* [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf @ 2020-10-29 15:36 Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar ` (5 more replies) 0 siblings, 6 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in RCU QSBR perf tests Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 43 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c5a238b9d1e8..0a2d76a983c3 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; -static uint64_t gwrites; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define NUM_ROUTE_ENTRIES num_route_entries #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) + enum { IP_CLASS_A, IP_CLASS_B, @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - RTE_SET_USED(arg); /* 2 writer threads are used */ if (core_id % 2 == 0) { si = 0; @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) total_cycles = rte_rdtsc_precise() - begin; __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); - __atomic_fetch_add(&gwrites, - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, - __ATOMIC_RELAXED); return 0; } @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); @@ -548,13 +544,10 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES ); /* Wait and check return value from reader threads */ @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); /* Launch reader threads */ @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-10-29 15:36 ` Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar ` (4 subsequent siblings) 5 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang, Honnappa Nagarahalli Cc: dev, nd, Dharmik Thakkar, stable Return error if Add/Delete fail in multiwriter perf test Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 0a2d76a983c3..251ea12345ae 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg) next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); + pthread_mutex_unlock(&lpm_mutex); + return -1; } pthread_mutex_unlock(&lpm_mutex); } @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg) large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); + pthread_mutex_unlock(&lpm_mutex); + return -1; } pthread_mutex_unlock(&lpm_mutex); } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-10-29 15:36 ` Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar ` (3 subsequent siblings) 5 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang, Honnappa Nagarahalli Cc: dev, nd, Dharmik Thakkar, stable Remove redundant error checking for reader threads since they never return error. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 251ea12345ae..4f15db4f85ee 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void) __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES ); - /* Wait and check return value from reader threads */ writer_done = 1; + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void) / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + if (rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - printf("Warning: lcore %u not finished.\n", - enabled_core_ids[i]); + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-10-29 15:36 ` Dharmik Thakkar 2020-11-02 17:17 ` Medvedkin, Vladimir 2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand ` (2 subsequent siblings) 5 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar Avoid code duplication by combining single and multi threaded tests Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 338 +++++++++------------------------------ 1 file changed, 73 insertions(+), 265 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 4f15db4f85ee..08312023b661 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { unsigned int i, j, si, ei; uint64_t begin, total_cycles; - uint8_t core_id = (uint8_t)((uintptr_t)arg); + uint8_t writer_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - /* 2 writer threads are used */ - if (core_id % 2 == 0) { + /* Single writer (writer_id = 1) */ + if (writer_id == 1) { + si = 0; + ei = NUM_LDEPTH_ROUTE_ENTRIES; + } + /* 2 Writers (writer_id = 2/3)*/ + else if (writer_id == 2) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; } else { @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg) /* * Functional test: - * 2 writers, rest are readers + * 1/2 writers, rest are readers */ static int -test_lpm_rcu_perf_multi_writer(void) +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) { struct rte_lpm_config config; size_t sz; - unsigned int i; + unsigned int i, j; uint16_t core_id; struct rte_lpm_rcu_config rcu_cfg = {0}; + int (*reader_f)(void *arg) = NULL; if (rte_lcore_count() < 3) { printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void) num_cores++; } - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES - ); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - - return 0; - -error: - writer_done = 1; - /* Wait until all readers have exited */ - rte_eal_mp_wait_lcore(); - - rte_lpm_free(lpm); - rte_free(rv); - - return -1; -} - -/* - * Functional test: - * Single writer, rest are readers - */ -static int -test_lpm_rcu_perf(void) -{ - struct rte_lpm_config config; - uint64_t begin, total_cycles; - size_t sz; - unsigned int i, j; - uint16_t core_id; - uint32_t next_hop_add = 0xAA; - struct rte_lpm_rcu_config rcu_cfg = {0}; - - if (rte_lcore_count() < 2) { - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); - return TEST_SKIPPED; - } - - num_cores = 0; - RTE_LCORE_FOREACH_WORKER(core_id) { - enabled_core_ids[num_cores] = core_id; - num_cores++; - } - - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + for (j = 1; j < 3; j++) { + if (use_rcu) + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration enabled\n", j, num_cores - j); + else + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration disabled\n", j, num_cores - j); + + /* Create LPM table */ + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; + config.flags = 0; + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + + /* Init RCU variable */ + if (use_rcu) { + sz = rte_rcu_qsbr_get_memsize(num_cores); + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, + RTE_CACHE_LINE_SIZE); + rte_rcu_qsbr_init(rv, num_cores); + + rcu_cfg.v = rv; + /* Assign the RCU variable to LPM */ + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { + printf("RCU variable assignment failed\n"); goto error; } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } - } - total_cycles = rte_rdtsc_precise() - begin; + reader_f = test_lpm_rcu_qsbr_reader; + } else + reader_f = test_lpm_reader; - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); + writer_done = 0; + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]); + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; + /* Launch reader threads */ + for (i = j; i < num_cores; i++) + rte_eal_remote_launch(reader_f, NULL, + enabled_core_ids[i]); - /* Test without RCU integration */ - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", - num_cores); + /* Launch writer threads */ + for (i = 0; i < j; i++) + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, + (void *)(uintptr_t)(i + j), + enabled_core_ids[i]); - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + /* Wait for writer threads */ + for (i = 0; i < j; i++) + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); + printf("Average LPM Add/Del: %"PRIu64" cycles\n", + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); + + writer_done = 1; + /* Wait until all readers have exited */ + for (i = j; i < num_cores; i++) + rte_eal_wait_lcore(enabled_core_ids[i]); + + rte_lpm_free(lpm); + rte_free(rv); + lpm = NULL; + rv = NULL; } - total_cycles = rte_rdtsc_precise() - begin; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); return 0; @@ -946,9 +755,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - test_lpm_rcu_perf(); - - test_lpm_rcu_perf_multi_writer(); + test_lpm_rcu_perf_multi_writer(0); + test_lpm_rcu_perf_multi_writer(1); return 0; } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-02 17:17 ` Medvedkin, Vladimir 2020-11-02 22:11 ` Dharmik Thakkar 0 siblings, 1 reply; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-02 17:17 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd Hi Dharmik, Thanks for the patches, see comments inlined On 29/10/2020 15:36, Dharmik Thakkar wrote: > Avoid code duplication by combining single and multi threaded tests > > Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test/test_lpm_perf.c | 338 +++++++++------------------------------ > 1 file changed, 73 insertions(+), 265 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index 4f15db4f85ee..08312023b661 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg) > { > unsigned int i, j, si, ei; > uint64_t begin, total_cycles; > - uint8_t core_id = (uint8_t)((uintptr_t)arg); > + uint8_t writer_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > > - /* 2 writer threads are used */ > - if (core_id % 2 == 0) { > + /* Single writer (writer_id = 1) */ > + if (writer_id == 1) { Probably it would be better to use enum here instead of 1/2/3? > + si = 0; > + ei = NUM_LDEPTH_ROUTE_ENTRIES; > + } > + /* 2 Writers (writer_id = 2/3)*/ > + else if (writer_id == 2) { > si = 0; > ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > } else { > @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > > /* > * Functional test: > - * 2 writers, rest are readers > + * 1/2 writers, rest are readers > */ > static int > -test_lpm_rcu_perf_multi_writer(void) > +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > { > struct rte_lpm_config config; > size_t sz; > - unsigned int i; > + unsigned int i, j; > uint16_t core_id; > struct rte_lpm_rcu_config rcu_cfg = {0}; > + int (*reader_f)(void *arg) = NULL; > > if (rte_lcore_count() < 3) { > printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); > @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void) > num_cores++; > } > > - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES > - ); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - > - return 0; > - > -error: > - writer_done = 1; > - /* Wait until all readers have exited */ > - rte_eal_mp_wait_lcore(); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - > - return -1; > -} > - > -/* > - * Functional test: > - * Single writer, rest are readers > - */ > -static int > -test_lpm_rcu_perf(void) > -{ > - struct rte_lpm_config config; > - uint64_t begin, total_cycles; > - size_t sz; > - unsigned int i, j; > - uint16_t core_id; > - uint32_t next_hop_add = 0xAA; > - struct rte_lpm_rcu_config rcu_cfg = {0}; > - > - if (rte_lcore_count() < 2) { > - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); > - return TEST_SKIPPED; > - } > - > - num_cores = 0; > - RTE_LCORE_FOREACH_WORKER(core_id) { > - enabled_core_ids[num_cores] = core_id; > - num_cores++; > - } > - > - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > + for (j = 1; j < 3; j++) { > + if (use_rcu) > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration enabled\n", j, num_cores - j); > + else > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration disabled\n", j, num_cores - j); > + > + /* Create LPM table */ > + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > + config.flags = 0; > + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > + TEST_LPM_ASSERT(lpm != NULL); > + > + /* Init RCU variable */ > + if (use_rcu) { > + sz = rte_rcu_qsbr_get_memsize(num_cores); > + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > + RTE_CACHE_LINE_SIZE); > + rte_rcu_qsbr_init(rv, num_cores); > + > + rcu_cfg.v = rv; > + /* Assign the RCU variable to LPM */ > + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > + printf("RCU variable assignment failed\n"); > goto error; > } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > - goto error; > - } > - } > - total_cycles = rte_rdtsc_precise() - begin; > + reader_f = test_lpm_rcu_qsbr_reader; > + } else > + reader_f = test_lpm_reader; > > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > + writer_done = 0; > + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]); > + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > + /* Launch reader threads */ > + for (i = j; i < num_cores; i++) > + rte_eal_remote_launch(reader_f, NULL, > + enabled_core_ids[i]); > > - /* Test without RCU integration */ > - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", > - num_cores); > + /* Launch writer threads */ > + for (i = 0; i < j; i++) > + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, So now even single writer will acquire a lock for every _add/_delete operation. I don't think it is necessary. > + (void *)(uintptr_t)(i + j), > + enabled_core_ids[i]); > > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > + /* Wait for writer threads */ > + for (i = 0; i < j; i++) > + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > - } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > - goto error; > - } > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > + printf("Average LPM Add/Del: %"PRIu64" cycles\n", > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > + > + writer_done = 1; > + /* Wait until all readers have exited */ > + for (i = j; i < num_cores; i++) > + rte_eal_wait_lcore(enabled_core_ids[i]); > + > + rte_lpm_free(lpm); > + rte_free(rv); > + lpm = NULL; > + rv = NULL; > } > - total_cycles = rte_rdtsc_precise() - begin; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > > return 0; > > @@ -946,9 +755,8 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - test_lpm_rcu_perf(); > - > - test_lpm_rcu_perf_multi_writer(); > + test_lpm_rcu_perf_multi_writer(0); > + test_lpm_rcu_perf_multi_writer(1); > > return 0; > } > -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-02 17:17 ` Medvedkin, Vladimir @ 2020-11-02 22:11 ` Dharmik Thakkar 0 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 22:11 UTC (permalink / raw) To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd > On Nov 2, 2020, at 11:17 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote: > > Hi Dharmik, > > Thanks for the patches, see comments inlined > > > On 29/10/2020 15:36, Dharmik Thakkar wrote: >> Avoid code duplication by combining single and multi threaded tests >> Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> app/test/test_lpm_perf.c | 338 +++++++++------------------------------ >> 1 file changed, 73 insertions(+), 265 deletions(-) >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c >> index 4f15db4f85ee..08312023b661 100644 >> --- a/app/test/test_lpm_perf.c >> +++ b/app/test/test_lpm_perf.c >> @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg) >> { >> unsigned int i, j, si, ei; >> uint64_t begin, total_cycles; >> - uint8_t core_id = (uint8_t)((uintptr_t)arg); >> + uint8_t writer_id = (uint8_t)((uintptr_t)arg); >> uint32_t next_hop_add = 0xAA; >> - /* 2 writer threads are used */ >> - if (core_id % 2 == 0) { >> + /* Single writer (writer_id = 1) */ >> + if (writer_id == 1) { > > Probably it would be better to use enum here instead of 1/2/3? > Yes, I will update the patch. >> + si = 0; >> + ei = NUM_LDEPTH_ROUTE_ENTRIES; >> + } >> + /* 2 Writers (writer_id = 2/3)*/ >> + else if (writer_id == 2) { >> si = 0; >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; >> } else { >> @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg) >> /* >> * Functional test: >> - * 2 writers, rest are readers >> + * 1/2 writers, rest are readers >> */ >> static int >> -test_lpm_rcu_perf_multi_writer(void) >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) >> { >> struct rte_lpm_config config; >> size_t sz; >> - unsigned int i; >> + unsigned int i, j; >> uint16_t core_id; >> struct rte_lpm_rcu_config rcu_cfg = {0}; >> + int (*reader_f)(void *arg) = NULL; >> if (rte_lcore_count() < 3) { >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); >> @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void) >> num_cores++; >> } >> - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES >> - ); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> - >> - /* Test without RCU integration */ >> - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> - / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - >> - return 0; >> - >> -error: >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - rte_eal_mp_wait_lcore(); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - >> - return -1; >> -} >> - >> -/* >> - * Functional test: >> - * Single writer, rest are readers >> - */ >> -static int >> -test_lpm_rcu_perf(void) >> -{ >> - struct rte_lpm_config config; >> - uint64_t begin, total_cycles; >> - size_t sz; >> - unsigned int i, j; >> - uint16_t core_id; >> - uint32_t next_hop_add = 0xAA; >> - struct rte_lpm_rcu_config rcu_cfg = {0}; >> - >> - if (rte_lcore_count() < 2) { >> - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); >> - return TEST_SKIPPED; >> - } >> - >> - num_cores = 0; >> - RTE_LCORE_FOREACH_WORKER(core_id) { >> - enabled_core_ids[num_cores] = core_id; >> - num_cores++; >> - } >> - >> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", >> - num_cores); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# %d\n", >> - i, j); >> + for (j = 1; j < 3; j++) { >> + if (use_rcu) >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration enabled\n", j, num_cores - j); >> + else >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration disabled\n", j, num_cores - j); >> + >> + /* Create LPM table */ >> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.flags = 0; >> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> + TEST_LPM_ASSERT(lpm != NULL); >> + >> + /* Init RCU variable */ >> + if (use_rcu) { >> + sz = rte_rcu_qsbr_get_memsize(num_cores); >> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> + RTE_CACHE_LINE_SIZE); >> + rte_rcu_qsbr_init(rv, num_cores); >> + >> + rcu_cfg.v = rv; >> + /* Assign the RCU variable to LPM */ >> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> + printf("RCU variable assignment failed\n"); >> goto error; >> } >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# %d\n", >> - i, j); >> - goto error; >> - } >> - } >> - total_cycles = rte_rdtsc_precise() - begin; >> + reader_f = test_lpm_rcu_qsbr_reader; >> + } else >> + reader_f = test_lpm_reader; >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> + writer_done = 0; >> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]); >> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> + /* Launch reader threads */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_remote_launch(reader_f, NULL, >> + enabled_core_ids[i]); >> - /* Test without RCU integration */ >> - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", >> - num_cores); >> + /* Launch writer threads */ >> + for (i = 0; i < j; i++) >> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > > So now even single writer will acquire a lock for every _add/_delete operation. I don't think it is necessary. Yes, agreed it is not necessary. I wanted to avoid additional if () statement, but I can add it in the new version. > >> + (void *)(uintptr_t)(i + j), >> + enabled_core_ids[i]); >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# %d\n", >> - i, j); >> + /* Wait for writer threads */ >> + for (i = 0; i < j; i++) >> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> goto error; >> - } >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# %d\n", >> - i, j); >> - goto error; >> - } >> + printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> + printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> + / TOTAL_WRITES); >> + >> + writer_done = 1; >> + /* Wait until all readers have exited */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> + >> + rte_lpm_free(lpm); >> + rte_free(rv); >> + lpm = NULL; >> + rv = NULL; >> } >> - total_cycles = rte_rdtsc_precise() - begin; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> return 0; >> @@ -946,9 +755,8 @@ test_lpm_perf(void) >> rte_lpm_delete_all(lpm); >> rte_lpm_free(lpm); >> - test_lpm_rcu_perf(); >> - >> - test_lpm_rcu_perf_multi_writer(); >> + test_lpm_rcu_perf_multi_writer(0); >> + test_lpm_rcu_perf_multi_writer(1); >> return 0; >> } > > -- > Regards, > Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (2 preceding siblings ...) 2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-02 10:08 ` David Marchand 2020-11-02 15:11 ` Bruce Richardson 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 5 siblings, 0 replies; 52+ messages in thread From: David Marchand @ 2020-11-02 10:08 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, Dharmik Thakkar, dpdk stable On Thu, Oct 29, 2020 at 4:37 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote: > > Fix incorrect calculations for LPM adds, LPM deletes, > and average cycles in RCU QSBR perf tests > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Bruce, Vladimir, reviews for this series please? Thanks. -- David Marchand ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (3 preceding siblings ...) 2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand @ 2020-11-02 15:11 ` Bruce Richardson 2020-11-02 16:58 ` Dharmik Thakkar 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 5 siblings, 1 reply; 52+ messages in thread From: Bruce Richardson @ 2020-11-02 15:11 UTC (permalink / raw) To: Dharmik Thakkar Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, stable On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote: > Fix incorrect calculations for LPM adds, LPM deletes, > and average cycles in RCU QSBR perf tests > To help review this patch, could you provide some more details in the commit log as to what exactly was wrong with the calculation and how this patch fixes things? > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-02 15:11 ` Bruce Richardson @ 2020-11-02 16:58 ` Dharmik Thakkar 2020-11-02 17:21 ` Medvedkin, Vladimir 2020-11-02 17:33 ` Bruce Richardson 0 siblings, 2 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 16:58 UTC (permalink / raw) To: Bruce Richardson Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, dpdk stable > On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote: > > On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote: >> Fix incorrect calculations for LPM adds, LPM deletes, >> and average cycles in RCU QSBR perf tests >> > > To help review this patch, could you provide some more details in the > commit log as to what exactly was wrong with the calculation and how this > patch fixes things? > I will update the commit message in the next version. Adding it here as well: Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’, replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles. Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value equal to TOTAL_WRITES. >> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") >> Cc: honnappa.nagarahalli@arm.com >> Cc: stable@dpdk.org >> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-02 16:58 ` Dharmik Thakkar @ 2020-11-02 17:21 ` Medvedkin, Vladimir 2020-11-02 17:33 ` Bruce Richardson 1 sibling, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-02 17:21 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, dpdk stable Hi Dharmik, On 02/11/2020 16:58, Dharmik Thakkar wrote: > >> On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote: >> >> On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote: >>> Fix incorrect calculations for LPM adds, LPM deletes, >>> and average cycles in RCU QSBR perf tests >>> >> >> To help review this patch, could you provide some more details in the >> commit log as to what exactly was wrong with the calculation and how this >> patch fixes things? >> > > I will update the commit message in the next version. Adding it here as well: > > Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’, > replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles. > > Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. > For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of > (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). > > Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), > this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value > equal to TOTAL_WRITES. > Thanks for the clarification. I left a few comments regarding 4-th patch. First 3 patches LGTM, just put more details in the commit message. >>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") >>> Cc: honnappa.nagarahalli@arm.com >>> Cc: stable@dpdk.org >>> >>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-02 16:58 ` Dharmik Thakkar 2020-11-02 17:21 ` Medvedkin, Vladimir @ 2020-11-02 17:33 ` Bruce Richardson 1 sibling, 0 replies; 52+ messages in thread From: Bruce Richardson @ 2020-11-02 17:33 UTC (permalink / raw) To: Dharmik Thakkar Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, dpdk stable On Mon, Nov 02, 2020 at 04:58:43PM +0000, Dharmik Thakkar wrote: > > > On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote: > > > > On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote: > >> Fix incorrect calculations for LPM adds, LPM deletes, > >> and average cycles in RCU QSBR perf tests > >> > > > > To help review this patch, could you provide some more details in the > > commit log as to what exactly was wrong with the calculation and how this > > patch fixes things? > > > > I will update the commit message in the next version. Adding it here as well: > > Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’, > replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles. > > Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. > For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of > (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). > > Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), > this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value > equal to TOTAL_WRITES. > Thanks for the clear explanation. ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (4 preceding siblings ...) 2020-11-02 15:11 ` Bruce Richardson @ 2020-11-02 23:51 ` Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (4 more replies) 5 siblings, 5 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 23:51 UTC (permalink / raw) Cc: dev, nd, Dharmik Thakkar Fix LPM adds, LPM deletes, and cycle calculation. Return error if LPM add/delete fails in multi-writer test. Remove redundant error checking for readers. Combine single and multi threaded test cases to avoid code duplication. --- v2: - Add more details about the fix to the commit message - Replace hard coded values with an enum - Remove lock acquire/release for single writer Dharmik Thakkar (4): test/lpm: fix cycle calculation in rcu qsbr perf test/lpm: return error on failure in rcu qsbr perf test/lpm: remove error checking in rcu qsbr perf test/lpm: avoid code duplication in rcu qsbr perf app/test/test_lpm_perf.c | 384 ++++++++++----------------------------- 1 file changed, 95 insertions(+), 289 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar @ 2020-11-02 23:52 ` Dharmik Thakkar 2020-11-03 1:30 ` Honnappa Nagarahalli 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar ` (3 subsequent siblings) 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in RCU QSBR perf tests Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS' for calculating adds, deletes, and cycles. Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed since it is always a fixed value equal to TOTAL_WRITES. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c5a238b9d1e8..45164b23214b 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; -static uint64_t gwrites; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define NUM_ROUTE_ENTRIES num_route_entries #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) + enum { IP_CLASS_A, IP_CLASS_B, @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - RTE_SET_USED(arg); /* 2 writer threads are used */ if (core_id % 2 == 0) { si = 0; @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) total_cycles = rte_rdtsc_precise() - begin; __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); - __atomic_fetch_add(&gwrites, - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, - __ATOMIC_RELAXED); return 0; } @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); /* Wait and check return value from reader threads */ writer_done = 1; @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); /* Launch reader threads */ @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-03 1:30 ` Honnappa Nagarahalli 0 siblings, 0 replies; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 1:30 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd <snip> > > Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in > RCU QSBR perf tests > > Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace > 'ITERATIONS' with 'RCU_ITERATIONS' > for calculating adds, deletes, and cycles. > > Also, for multi-writer perf test, each writer only writes half of > NUM_LDEPTH_ROUTE_ENTRIES. > For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * > NUM_LDEPTH_ROUTE_ENTRIES) instead of > (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). > > Since, for both the single and multi writer tests, total adds/deletes is equal to > (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced > with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed > since it is always a fixed value equal to TOTAL_WRITES. > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalliu@arm.com> > --- > app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- > 1 file changed, 16 insertions(+), 29 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > c5a238b9d1e8..45164b23214b 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t > writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; - > static uint64_t gwrites; > /* LPM APIs are not thread safe, use mutex to provide thread safety */ > static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; > > @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define > NUM_ROUTE_ENTRIES num_route_entries #define > NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries > > +#define TOTAL_WRITES (RCU_ITERATIONS * > NUM_LDEPTH_ROUTE_ENTRIES) > + > enum { > IP_CLASS_A, > IP_CLASS_B, > @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) > uint8_t core_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > > - RTE_SET_USED(arg); > /* 2 writer threads are used */ > if (core_id % 2 == 0) { > si = 0; > @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) > total_cycles = rte_rdtsc_precise() - begin; > > __atomic_fetch_add(&gwrite_cycles, total_cycles, > __ATOMIC_RELAXED); > - __atomic_fetch_add(&gwrites, > - 2 * NUM_LDEPTH_ROUTE_ENTRIES * > RCU_ITERATIONS, > - __ATOMIC_RELAXED); > > return 0; > } > @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) > > writer_done = 0; > __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); > > __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) > if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > > - printf("Total LPM Adds: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / > - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) > - ); > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > > /* Wait and check return value from reader threads */ > writer_done = 1; > @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) > > writer_done = 0; > __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); > __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > /* Launch reader threads */ > @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) > if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > > - printf("Total LPM Adds: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / > - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) > - ); > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ @@ -711,11 > +700,10 @@ test_lpm_rcu_perf(void) > } > total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", ITERATIONS * > NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * > ITERATIONS)); > + (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ @@ -771,11 > +759,10 @@ test_lpm_rcu_perf(void) > } > total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", ITERATIONS * > NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * > ITERATIONS)); > + (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-02 23:52 ` Dharmik Thakkar 2020-11-03 1:28 ` Honnappa Nagarahalli 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar ` (2 subsequent siblings) 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Gavin Hu, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable Return error if Add/Delete fail in multiwriter perf test Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 45164b23214b..55084816ab91 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg) next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); + pthread_mutex_unlock(&lpm_mutex); + return -1; } pthread_mutex_unlock(&lpm_mutex); } @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg) large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); + pthread_mutex_unlock(&lpm_mutex); + return -1; } pthread_mutex_unlock(&lpm_mutex); } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-03 1:28 ` Honnappa Nagarahalli 2020-11-03 4:42 ` Dharmik Thakkar 0 siblings, 1 reply; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 1:28 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd <snip> > > Return error if Add/Delete fail in multiwriter perf test > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test/test_lpm_perf.c | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > 45164b23214b..55084816ab91 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg) > next_hop_add) != 0) { > printf("Failed to add iteration %d, route# > %d\n", > i, j); > + pthread_mutex_unlock(&lpm_mutex); > + return -1; Would be good to use the "goto error" method used in this file in other functions. > } > pthread_mutex_unlock(&lpm_mutex); > } > @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg) > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# > %d\n", > i, j); > + pthread_mutex_unlock(&lpm_mutex); > + return -1; > } > pthread_mutex_unlock(&lpm_mutex); > } > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-03 1:28 ` Honnappa Nagarahalli @ 2020-11-03 4:42 ` Dharmik Thakkar 0 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 4:42 UTC (permalink / raw) To: Honnappa Nagarahalli Cc: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang, dev, nd, stable > On Nov 2, 2020, at 7:28 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > <snip> > >> >> Return error if Add/Delete fail in multiwriter perf test >> >> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") >> Cc: honnappa.nagarahalli@arm.com >> Cc: stable@dpdk.org >> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> app/test/test_lpm_perf.c | 4 ++++ >> 1 file changed, 4 insertions(+) >> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index >> 45164b23214b..55084816ab91 100644 >> --- a/app/test/test_lpm_perf.c >> +++ b/app/test/test_lpm_perf.c >> @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg) >> next_hop_add) != 0) { >> printf("Failed to add iteration %d, route# >> %d\n", >> i, j); >> + pthread_mutex_unlock(&lpm_mutex); >> + return -1; > Would be good to use the "goto error" method used in this file in other functions. Yes, will update in the next version. > >> } >> pthread_mutex_unlock(&lpm_mutex); >> } >> @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg) >> large_ldepth_route_table[j].depth) != 0) { >> printf("Failed to delete iteration %d, route# >> %d\n", >> i, j); >> + pthread_mutex_unlock(&lpm_mutex); >> + return -1; >> } >> pthread_mutex_unlock(&lpm_mutex); >> } >> -- >> 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-02 23:52 ` Dharmik Thakkar 2020-11-03 1:21 ` Honnappa Nagarahalli 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Honnappa Nagarahalli, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Remove redundant error checking for reader threads since they never return error. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 55084816ab91..224c92fa3d65 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void) __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES); - /* Wait and check return value from reader threads */ writer_done = 1; + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void) / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + if (rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - printf("Warning: lcore %u not finished.\n", - enabled_core_ids[i]); + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-03 1:21 ` Honnappa Nagarahalli 2020-11-03 4:56 ` Dharmik Thakkar 0 siblings, 1 reply; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 1:21 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd <snip> > > Remove redundant error checking for reader threads since they never return > error. > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test/test_lpm_perf.c | 21 ++++++++------------- > 1 file changed, 8 insertions(+), 13 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > 55084816ab91..224c92fa3d65 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void) > __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > / TOTAL_WRITES); > > - /* Wait and check return value from reader threads */ > writer_done = 1; > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > rte_free(rv); > @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void) > / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + if (rte_eal_wait_lcore(enabled_core_ids[i]); ^^ Do we need the 'if' statement? > > rte_lpm_free(lpm); > rte_free(rv); > @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - printf("Warning: lcore %u not finished.\n", > - enabled_core_ids[i]); > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-03 1:21 ` Honnappa Nagarahalli @ 2020-11-03 4:56 ` Dharmik Thakkar 0 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 4:56 UTC (permalink / raw) To: Honnappa Nagarahalli Cc: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu, dev, nd, stable > On Nov 2, 2020, at 7:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > <snip> > >> >> Remove redundant error checking for reader threads since they never return >> error. >> >> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") >> Cc: honnappa.nagarahalli@arm.com >> Cc: stable@dpdk.org >> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> app/test/test_lpm_perf.c | 21 ++++++++------------- >> 1 file changed, 8 insertions(+), 13 deletions(-) >> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index >> 55084816ab91..224c92fa3d65 100644 >> --- a/app/test/test_lpm_perf.c >> +++ b/app/test/test_lpm_perf.c >> @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void) >> __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> / TOTAL_WRITES); >> >> - /* Wait and check return value from reader threads */ >> writer_done = 1; >> + /* Wait until all readers have exited */ >> for (i = 2; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> >> rte_lpm_free(lpm); >> rte_free(rv); >> @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void) >> / TOTAL_WRITES); >> >> writer_done = 1; >> - /* Wait and check return value from reader threads */ >> + /* Wait until all readers have exited */ >> for (i = 2; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> >> rte_lpm_free(lpm); >> >> @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void) >> (double)total_cycles / TOTAL_WRITES); >> >> writer_done = 1; >> - /* Wait and check return value from reader threads */ >> + /* Wait until all readers have exited */ >> for (i = 0; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> + if (rte_eal_wait_lcore(enabled_core_ids[i]); > ^^ Do we need the 'if' statement? No, will remove in the next version. >> >> rte_lpm_free(lpm); >> rte_free(rv); >> @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void) >> (double)total_cycles / TOTAL_WRITES); >> >> writer_done = 1; >> - /* Wait and check return value from reader threads */ >> + /* Wait until all readers have exited */ >> for (i = 0; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - printf("Warning: lcore %u not finished.\n", >> - enabled_core_ids[i]); >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> >> rte_lpm_free(lpm); >> >> -- >> 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (2 preceding siblings ...) 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-02 23:52 ` Dharmik Thakkar 2020-11-03 4:21 ` Honnappa Nagarahalli 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar Avoid code duplication by combining single and multi threaded tests Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 362 ++++++++++----------------------------- 1 file changed, 91 insertions(+), 271 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 224c92fa3d65..229c835c23f7 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -67,6 +67,12 @@ enum { IP_CLASS_C }; +enum { + SINGLE_WRITER = 1, + MULTI_WRITER_1, + MULTI_WRITER_2 +}; + /* struct route_rule_count defines the total number of rules in following a/b/c * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not * including the ones for private local network. @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { unsigned int i, j, si, ei; uint64_t begin, total_cycles; - uint8_t core_id = (uint8_t)((uintptr_t)arg); + uint8_t writer_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - /* 2 writer threads are used */ - if (core_id % 2 == 0) { + /* Single writer (writer_id = 1) */ + if (writer_id == SINGLE_WRITER) { + si = 0; + ei = NUM_LDEPTH_ROUTE_ENTRIES; + } + /* 2 Writers (writer_id = 2/3)*/ + else if (writer_id == MULTI_WRITER_1) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; } else { @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; i < RCU_ITERATIONS; i++) { /* Add all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth, next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); return -1; } - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); } /* Delete all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); return -1; } - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); } } @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) /* * Functional test: - * 2 writers, rest are readers + * 1/2 writers, rest are readers */ static int -test_lpm_rcu_perf_multi_writer(void) +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) { struct rte_lpm_config config; size_t sz; - unsigned int i; + unsigned int i, j; uint16_t core_id; struct rte_lpm_rcu_config rcu_cfg = {0}; + int (*reader_f)(void *arg) = NULL; if (rte_lcore_count() < 3) { printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); @@ -504,273 +522,76 @@ test_lpm_rcu_perf_multi_writer(void) num_cores++; } - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - - return 0; - -error: - writer_done = 1; - /* Wait until all readers have exited */ - rte_eal_mp_wait_lcore(); - - rte_lpm_free(lpm); - rte_free(rv); - - return -1; -} - -/* - * Functional test: - * Single writer, rest are readers - */ -static int -test_lpm_rcu_perf(void) -{ - struct rte_lpm_config config; - uint64_t begin, total_cycles; - size_t sz; - unsigned int i, j; - uint16_t core_id; - uint32_t next_hop_add = 0xAA; - struct rte_lpm_rcu_config rcu_cfg = {0}; - - if (rte_lcore_count() < 2) { - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); - return TEST_SKIPPED; - } - - num_cores = 0; - RTE_LCORE_FOREACH_WORKER(core_id) { - enabled_core_ids[num_cores] = core_id; - num_cores++; - } - - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + for (j = 1; j < 3; j++) { + if (use_rcu) + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration enabled\n", j, num_cores - j); + else + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration disabled\n", j, num_cores - j); + + /* Create LPM table */ + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; + config.flags = 0; + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + + /* Init RCU variable */ + if (use_rcu) { + sz = rte_rcu_qsbr_get_memsize(num_cores); + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, + RTE_CACHE_LINE_SIZE); + rte_rcu_qsbr_init(rv, num_cores); + + rcu_cfg.v = rv; + /* Assign the RCU variable to LPM */ + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { + printf("RCU variable assignment failed\n"); goto error; } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } - } - total_cycles = rte_rdtsc_precise() - begin; + reader_f = test_lpm_rcu_qsbr_reader; + } else + reader_f = test_lpm_reader; - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); + writer_done = 0; + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); + /* Launch reader threads */ + for (i = j; i < num_cores; i++) + rte_eal_remote_launch(reader_f, NULL, + enabled_core_ids[i]); - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); + /* Launch writer threads */ + for (i = 0; i < j; i++) + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, + (void *)(uintptr_t)(i + j), + enabled_core_ids[i]); - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + /* Wait for writer threads */ + for (i = 0; i < j; i++) + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); + printf("Average LPM Add/Del: %"PRIu64" cycles\n", + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); + + writer_done = 1; + /* Wait until all readers have exited */ + for (i = j; i < num_cores; i++) + rte_eal_wait_lcore(enabled_core_ids[i]); + + rte_lpm_free(lpm); + rte_free(rv); + lpm = NULL; + rv = NULL; } - total_cycles = rte_rdtsc_precise() - begin; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); return 0; @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - test_lpm_rcu_perf(); - - test_lpm_rcu_perf_multi_writer(); + test_lpm_rcu_perf_multi_writer(0); + test_lpm_rcu_perf_multi_writer(1); return 0; } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-03 4:21 ` Honnappa Nagarahalli 2020-11-03 4:33 ` Dharmik Thakkar 0 siblings, 1 reply; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 4:21 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd <snip> > > Avoid code duplication by combining single and multi threaded tests > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test/test_lpm_perf.c | 362 ++++++++++----------------------------- > 1 file changed, 91 insertions(+), 271 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > 224c92fa3d65..229c835c23f7 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -67,6 +67,12 @@ enum { > IP_CLASS_C > }; > > +enum { > + SINGLE_WRITER = 1, > + MULTI_WRITER_1, > + MULTI_WRITER_2 > +}; Do we need this? Can we use the number of cores instead? > + > /* struct route_rule_count defines the total number of rules in following > a/b/c > * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, > not > * including the ones for private local network. > @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { > unsigned int i, j, si, ei; > uint64_t begin, total_cycles; > - uint8_t core_id = (uint8_t)((uintptr_t)arg); > + uint8_t writer_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > > - /* 2 writer threads are used */ > - if (core_id % 2 == 0) { > + /* Single writer (writer_id = 1) */ > + if (writer_id == SINGLE_WRITER) { > + si = 0; > + ei = NUM_LDEPTH_ROUTE_ENTRIES; > + } > + /* 2 Writers (writer_id = 2/3)*/ > + else if (writer_id == MULTI_WRITER_1) { > si = 0; > ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > } else { > @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) > for (i = 0; i < RCU_ITERATIONS; i++) { > /* Add all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth, > next_hop_add) != 0) { > printf("Failed to add iteration %d, route# > %d\n", > i, j); > - pthread_mutex_unlock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + > pthread_mutex_unlock(&lpm_mutex); > return -1; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + pthread_mutex_unlock(&lpm_mutex); > } > > /* Delete all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# > %d\n", > i, j); > - pthread_mutex_unlock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + > pthread_mutex_unlock(&lpm_mutex); > return -1; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (writer_id != SINGLE_WRITER) > + pthread_mutex_unlock(&lpm_mutex); > } > } > > @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > > /* > * Functional test: > - * 2 writers, rest are readers > + * 1/2 writers, rest are readers > */ > static int > -test_lpm_rcu_perf_multi_writer(void) > +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > { > struct rte_lpm_config config; > size_t sz; > - unsigned int i; > + unsigned int i, j; > uint16_t core_id; > struct rte_lpm_rcu_config rcu_cfg = {0}; > + int (*reader_f)(void *arg) = NULL; > > if (rte_lcore_count() < 3) { > printf("Not enough cores for lpm_rcu_perf_autotest, > expecting at least 3\n"); @@ -504,273 +522,76 @@ > test_lpm_rcu_perf_multi_writer(void) > num_cores++; > } > > - printf("\nPerf test: 2 writers, %d readers, RCU integration > enabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 2 writers, %d readers, RCU integration > disabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - > - return 0; > - > -error: > - writer_done = 1; > - /* Wait until all readers have exited */ > - rte_eal_mp_wait_lcore(); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - > - return -1; > -} > - > -/* > - * Functional test: > - * Single writer, rest are readers > - */ > -static int > -test_lpm_rcu_perf(void) > -{ > - struct rte_lpm_config config; > - uint64_t begin, total_cycles; > - size_t sz; > - unsigned int i, j; > - uint16_t core_id; > - uint32_t next_hop_add = 0xAA; > - struct rte_lpm_rcu_config rcu_cfg = {0}; > - > - if (rte_lcore_count() < 2) { > - printf("Not enough cores for lpm_rcu_perf_autotest, > expecting at least 2\n"); > - return TEST_SKIPPED; > - } > - > - num_cores = 0; > - RTE_LCORE_FOREACH_WORKER(core_id) { > - enabled_core_ids[num_cores] = core_id; > - num_cores++; > - } > - > - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# > %d\n", > - i, j); > + for (j = 1; j < 3; j++) { > + if (use_rcu) > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration enabled\n", j, num_cores - j); > + else > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration disabled\n", j, num_cores - j); > + > + /* Create LPM table */ > + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > + config.flags = 0; > + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > + TEST_LPM_ASSERT(lpm != NULL); > + > + /* Init RCU variable */ > + if (use_rcu) { > + sz = rte_rcu_qsbr_get_memsize(num_cores); > + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > + > RTE_CACHE_LINE_SIZE); > + rte_rcu_qsbr_init(rv, num_cores); > + > + rcu_cfg.v = rv; > + /* Assign the RCU variable to LPM */ > + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > + printf("RCU variable assignment failed\n"); > goto error; > } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# > %d\n", > - i, j); > - goto error; > - } > - } > - total_cycles = rte_rdtsc_precise() - begin; > + reader_f = test_lpm_rcu_qsbr_reader; > + } else > + reader_f = test_lpm_reader; > > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > + writer_done = 0; > + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 1 writer, %d readers, RCU integration > disabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > + /* Launch reader threads */ > + for (i = j; i < num_cores; i++) > + rte_eal_remote_launch(reader_f, NULL, > + enabled_core_ids[i]); > > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > + /* Launch writer threads */ > + for (i = 0; i < j; i++) > + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > + (void *)(uintptr_t)(i + j), This can be just 'j'? > + enabled_core_ids[i]); > > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# > %d\n", > - i, j); > + /* Wait for writer threads */ > + for (i = 0; i < j; i++) > + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > - } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# > %d\n", > - i, j); > - goto error; > - } > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > + printf("Average LPM Add/Del: %"PRIu64" cycles\n", > + __atomic_load_n(&gwrite_cycles, > __ATOMIC_RELAXED) > + / TOTAL_WRITES); > + > + writer_done = 1; > + /* Wait until all readers have exited */ > + for (i = j; i < num_cores; i++) > + rte_eal_wait_lcore(enabled_core_ids[i]); > + > + rte_lpm_free(lpm); > + rte_free(rv); > + lpm = NULL; > + rv = NULL; > } > - total_cycles = rte_rdtsc_precise() - begin; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > > return 0; > > @@ -946,9 +767,8 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - test_lpm_rcu_perf(); > - > - test_lpm_rcu_perf_multi_writer(); > + test_lpm_rcu_perf_multi_writer(0); > + test_lpm_rcu_perf_multi_writer(1); > > return 0; > } > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 4:21 ` Honnappa Nagarahalli @ 2020-11-03 4:33 ` Dharmik Thakkar 2020-11-03 5:32 ` Honnappa Nagarahalli 0 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 4:33 UTC (permalink / raw) To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd > On Nov 2, 2020, at 10:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > <snip> >> >> Avoid code duplication by combining single and multi threaded tests >> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> app/test/test_lpm_perf.c | 362 ++++++++++----------------------------- >> 1 file changed, 91 insertions(+), 271 deletions(-) >> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index >> 224c92fa3d65..229c835c23f7 100644 >> --- a/app/test/test_lpm_perf.c >> +++ b/app/test/test_lpm_perf.c >> @@ -67,6 +67,12 @@ enum { >> IP_CLASS_C >> }; >> >> +enum { >> + SINGLE_WRITER = 1, >> + MULTI_WRITER_1, >> + MULTI_WRITER_2 >> +}; > Do we need this? Can we use the number of cores instead? > There are 3 combinations of writes (adds/deletes): 1. Write all the entries - in case of a single writer 2. Write half of the entries - in case of multiple writers 3. Write remaining half of the entries - in case of multiple writers So, I think this is required. >> + >> /* struct route_rule_count defines the total number of rules in following >> a/b/c >> * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, >> not >> * including the ones for private local network. >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { >> unsigned int i, j, si, ei; >> uint64_t begin, total_cycles; >> - uint8_t core_id = (uint8_t)((uintptr_t)arg); >> + uint8_t writer_id = (uint8_t)((uintptr_t)arg); >> uint32_t next_hop_add = 0xAA; >> >> - /* 2 writer threads are used */ >> - if (core_id % 2 == 0) { >> + /* Single writer (writer_id = 1) */ >> + if (writer_id == SINGLE_WRITER) { >> + si = 0; >> + ei = NUM_LDEPTH_ROUTE_ENTRIES; >> + } >> + /* 2 Writers (writer_id = 2/3)*/ >> + else if (writer_id == MULTI_WRITER_1) { >> si = 0; >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; >> } else { >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) >> for (i = 0; i < RCU_ITERATIONS; i++) { >> /* Add all the entries */ >> for (j = si; j < ei; j++) { >> - pthread_mutex_lock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + pthread_mutex_lock(&lpm_mutex); >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> large_ldepth_route_table[j].depth, >> next_hop_add) != 0) { >> printf("Failed to add iteration %d, route# >> %d\n", >> i, j); >> - pthread_mutex_unlock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + >> pthread_mutex_unlock(&lpm_mutex); >> return -1; >> } >> - pthread_mutex_unlock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + pthread_mutex_unlock(&lpm_mutex); >> } >> >> /* Delete all the entries */ >> for (j = si; j < ei; j++) { >> - pthread_mutex_lock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + pthread_mutex_lock(&lpm_mutex); >> if (rte_lpm_delete(lpm, >> large_ldepth_route_table[j].ip, >> large_ldepth_route_table[j].depth) != 0) { >> printf("Failed to delete iteration %d, route# >> %d\n", >> i, j); >> - pthread_mutex_unlock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + >> pthread_mutex_unlock(&lpm_mutex); >> return -1; >> } >> - pthread_mutex_unlock(&lpm_mutex); >> + if (writer_id != SINGLE_WRITER) >> + pthread_mutex_unlock(&lpm_mutex); >> } >> } >> >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) >> >> /* >> * Functional test: >> - * 2 writers, rest are readers >> + * 1/2 writers, rest are readers >> */ >> static int >> -test_lpm_rcu_perf_multi_writer(void) >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) >> { >> struct rte_lpm_config config; >> size_t sz; >> - unsigned int i; >> + unsigned int i, j; >> uint16_t core_id; >> struct rte_lpm_rcu_config rcu_cfg = {0}; >> + int (*reader_f)(void *arg) = NULL; >> >> if (rte_lcore_count() < 3) { >> printf("Not enough cores for lpm_rcu_perf_autotest, >> expecting at least 3\n"); @@ -504,273 +522,76 @@ >> test_lpm_rcu_perf_multi_writer(void) >> num_cores++; >> } >> >> - printf("\nPerf test: 2 writers, %d readers, RCU integration >> enabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> - / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> - >> - /* Test without RCU integration */ >> - printf("\nPerf test: 2 writers, %d readers, RCU integration >> disabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> - / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - >> - return 0; >> - >> -error: >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - rte_eal_mp_wait_lcore(); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - >> - return -1; >> -} >> - >> -/* >> - * Functional test: >> - * Single writer, rest are readers >> - */ >> -static int >> -test_lpm_rcu_perf(void) >> -{ >> - struct rte_lpm_config config; >> - uint64_t begin, total_cycles; >> - size_t sz; >> - unsigned int i, j; >> - uint16_t core_id; >> - uint32_t next_hop_add = 0xAA; >> - struct rte_lpm_rcu_config rcu_cfg = {0}; >> - >> - if (rte_lcore_count() < 2) { >> - printf("Not enough cores for lpm_rcu_perf_autotest, >> expecting at least 2\n"); >> - return TEST_SKIPPED; >> - } >> - >> - num_cores = 0; >> - RTE_LCORE_FOREACH_WORKER(core_id) { >> - enabled_core_ids[num_cores] = core_id; >> - num_cores++; >> - } >> - >> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", >> - num_cores); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# >> %d\n", >> - i, j); >> + for (j = 1; j < 3; j++) { >> + if (use_rcu) >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration enabled\n", j, num_cores - j); >> + else >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration disabled\n", j, num_cores - j); >> + >> + /* Create LPM table */ >> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.flags = 0; >> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> + TEST_LPM_ASSERT(lpm != NULL); >> + >> + /* Init RCU variable */ >> + if (use_rcu) { >> + sz = rte_rcu_qsbr_get_memsize(num_cores); >> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> + >> RTE_CACHE_LINE_SIZE); >> + rte_rcu_qsbr_init(rv, num_cores); >> + >> + rcu_cfg.v = rv; >> + /* Assign the RCU variable to LPM */ >> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> + printf("RCU variable assignment failed\n"); >> goto error; >> } >> >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, >> large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# >> %d\n", >> - i, j); >> - goto error; >> - } >> - } >> - total_cycles = rte_rdtsc_precise() - begin; >> + reader_f = test_lpm_rcu_qsbr_reader; >> + } else >> + reader_f = test_lpm_reader; >> >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> + writer_done = 0; >> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> - >> - /* Test without RCU integration */ >> - printf("\nPerf test: 1 writer, %d readers, RCU integration >> disabled\n", >> - num_cores); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> + /* Launch reader threads */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_remote_launch(reader_f, NULL, >> + enabled_core_ids[i]); >> >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> + /* Launch writer threads */ >> + for (i = 0; i < j; i++) >> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> + (void *)(uintptr_t)(i + j), > This can be just 'j'? > >> + enabled_core_ids[i]); >> >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# >> %d\n", >> - i, j); >> + /* Wait for writer threads */ >> + for (i = 0; i < j; i++) >> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> goto error; >> - } >> >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, >> large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# >> %d\n", >> - i, j); >> - goto error; >> - } >> + printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> + printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> + __atomic_load_n(&gwrite_cycles, >> __ATOMIC_RELAXED) >> + / TOTAL_WRITES); >> + >> + writer_done = 1; >> + /* Wait until all readers have exited */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> + >> + rte_lpm_free(lpm); >> + rte_free(rv); >> + lpm = NULL; >> + rv = NULL; >> } >> - total_cycles = rte_rdtsc_precise() - begin; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> >> return 0; >> >> @@ -946,9 +767,8 @@ test_lpm_perf(void) >> rte_lpm_delete_all(lpm); >> rte_lpm_free(lpm); >> >> - test_lpm_rcu_perf(); >> - >> - test_lpm_rcu_perf_multi_writer(); >> + test_lpm_rcu_perf_multi_writer(0); >> + test_lpm_rcu_perf_multi_writer(1); >> >> return 0; >> } >> -- >> 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 4:33 ` Dharmik Thakkar @ 2020-11-03 5:32 ` Honnappa Nagarahalli 2020-11-03 14:03 ` Dharmik Thakkar 0 siblings, 1 reply; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 5:32 UTC (permalink / raw) To: Dharmik Thakkar Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd <snip> > >> > >> Avoid code duplication by combining single and multi threaded tests > >> > >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > >> --- > >> app/test/test_lpm_perf.c | 362 > >> ++++++++++----------------------------- > >> 1 file changed, 91 insertions(+), 271 deletions(-) > >> > >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > >> index > >> 224c92fa3d65..229c835c23f7 100644 > >> --- a/app/test/test_lpm_perf.c > >> +++ b/app/test/test_lpm_perf.c > >> @@ -67,6 +67,12 @@ enum { > >> IP_CLASS_C > >> }; > >> > >> +enum { > >> +SINGLE_WRITER = 1, > >> +MULTI_WRITER_1, > >> +MULTI_WRITER_2 > >> +}; > > Do we need this? Can we use the number of cores instead? > > > > There are 3 combinations of writes (adds/deletes): > 1. Write all the entries - in case of a single writer 2. Write half of the entries - > in case of multiple writers 3. Write remaining half of the entries - in case of > multiple writers > > So, I think this is required. IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space. Creating a structure with these 2 and passing that to the writer thread would be better and scalable. > > >> + > >> /* struct route_rule_count defines the total number of rules in > >> following a/b/c > >> * each item in a[]/b[]/c[] is the number of common IP address class > >> A/B/C, not > >> * including the ones for private local network. > >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { > unsigned > >> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = > >> (uint8_t)((uintptr_t)arg); > >> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); > >> uint32_t next_hop_add = 0xAA; > >> > >> -/* 2 writer threads are used */ > >> -if (core_id % 2 == 0) { > >> +/* Single writer (writer_id = 1) */ > >> +if (writer_id == SINGLE_WRITER) { > >> +si = 0; > >> +ei = NUM_LDEPTH_ROUTE_ENTRIES; > >> +} > >> +/* 2 Writers (writer_id = 2/3)*/ > >> +else if (writer_id == MULTI_WRITER_1) { > >> si = 0; > >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > >> } else { > >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; > >> i < RCU_ITERATIONS; i++) { > >> /* Add all the entries */ > >> for (j = si; j < ei; j++) { > >> -pthread_mutex_lock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_lock(&lpm_mutex); > >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >> large_ldepth_route_table[j].depth, > >> next_hop_add) != 0) { > >> printf("Failed to add iteration %d, route# %d\n", i, j); > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> + > >> pthread_mutex_unlock(&lpm_mutex); > >> return -1; > >> } > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_unlock(&lpm_mutex); > >> } > >> > >> /* Delete all the entries */ > >> for (j = si; j < ei; j++) { > >> -pthread_mutex_lock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_lock(&lpm_mutex); > >> if (rte_lpm_delete(lpm, > >> large_ldepth_route_table[j].ip, > >> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete > >> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> + > >> pthread_mutex_unlock(&lpm_mutex); > >> return -1; > >> } > >> -pthread_mutex_unlock(&lpm_mutex); > >> +if (writer_id != SINGLE_WRITER) > >> +pthread_mutex_unlock(&lpm_mutex); > >> } > >> } > >> > >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > >> > >> /* > >> * Functional test: > >> - * 2 writers, rest are readers > >> + * 1/2 writers, rest are readers > >> */ > >> static int > >> -test_lpm_rcu_perf_multi_writer(void) > >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > >> { > >> struct rte_lpm_config config; > >> size_t sz; > >> -unsigned int i; > >> +unsigned int i, j; > >> uint16_t core_id; > >> struct rte_lpm_rcu_config rcu_cfg = {0}; > >> +int (*reader_f)(void *arg) = NULL; > >> > >> if (rte_lcore_count() < 3) { > >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >> least 3\n"); @@ -504,273 +522,76 @@ > >> test_lpm_rcu_perf_multi_writer(void) > >> num_cores++; > >> } > >> > >> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >> enabled\n", -num_cores - 2); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -/* Init RCU variable */ > >> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >> - > >> -rcu_cfg.v = rv; > >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, > >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto > >> error; -} > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> - > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 2; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >> -enabled_core_ids[i]); > >> - > >> -/* Launch writer threads */ > >> -for (i = 0; i < 2; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> -(void *)(uintptr_t)i, > >> -enabled_core_ids[i]); > >> - > >> -/* Wait for writer threads */ > >> -for (i = 0; i < 2; i++) > >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; > >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> -lpm = NULL; > >> -rv = NULL; > >> - > >> -/* Test without RCU integration */ > >> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >> disabled\n", -num_cores - 2); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 2; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]); > >> - > >> -/* Launch writer threads */ > >> -for (i = 0; i < 2; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> -(void *)(uintptr_t)i, > >> -enabled_core_ids[i]); > >> - > >> -/* Wait for writer threads */ > >> -for (i = 0; i < 2; i++) > >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; > >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> - > >> -return 0; > >> - > >> -error: > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> - > >> -return -1; > >> -} > >> - > >> -/* > >> - * Functional test: > >> - * Single writer, rest are readers > >> - */ > >> -static int > >> -test_lpm_rcu_perf(void) > >> -{ > >> -struct rte_lpm_config config; > >> -uint64_t begin, total_cycles; > >> -size_t sz; > >> -unsigned int i, j; > >> -uint16_t core_id; > >> -uint32_t next_hop_add = 0xAA; > >> -struct rte_lpm_rcu_config rcu_cfg = {0}; > >> - > >> -if (rte_lcore_count() < 2) { > >> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >> least 2\n"); -return TEST_SKIPPED; -} > >> - > >> -num_cores = 0; > >> -RTE_LCORE_FOREACH_WORKER(core_id) { > >> -enabled_core_ids[num_cores] = core_id; -num_cores++; -} > >> - > >> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >> enabled\n", -num_cores); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> - > >> -/* Init RCU variable */ > >> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >> - > >> -rcu_cfg.v = rv; > >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, > >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto > >> error; -} > >> - > >> -writer_done = 0; > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> - > >> -/* Launch reader threads */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >> -enabled_core_ids[i]); > >> - > >> -/* Measure add/delete. */ > >> -begin = rte_rdtsc_precise(); > >> -for (i = 0; i < RCU_ITERATIONS; i++) { > >> -/* Add all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm, > >> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth, > >> -next_hop_add) != 0) { > >> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >> +for (j = 1; j < 3; j++) { > >> +if (use_rcu) > >> +printf("\nPerf test: %d writer(s), %d reader(s)," > >> + " RCU integration enabled\n", j, num_cores - j); else > >> +printf("\nPerf test: %d writer(s), %d reader(s)," > >> + " RCU integration disabled\n", j, num_cores - j); > >> + > >> +/* Create LPM table */ > >> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > config.number_tbl8s = > >> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = > >> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> +TEST_LPM_ASSERT(lpm != NULL); > >> + > >> +/* Init RCU variable */ > >> +if (use_rcu) { > >> +sz = rte_rcu_qsbr_get_memsize(num_cores); > >> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >> + > >> RTE_CACHE_LINE_SIZE); > >> +rte_rcu_qsbr_init(rv, num_cores); > >> + > >> +rcu_cfg.v = rv; > >> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, > >> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); > >> goto error; > >> } > >> > >> -/* Delete all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete > >> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles > >> = rte_rdtsc_precise() - begin; > >> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = > >> +test_lpm_reader; > >> > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >> +writer_done = 0; > >> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >> > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores; > >> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> -rte_free(rv); > >> -lpm = NULL; > >> -rv = NULL; > >> - > >> -/* Test without RCU integration */ > >> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >> disabled\n", -num_cores); > >> - > >> -/* Create LPM table */ > >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > config.number_tbl8s = > >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >> -TEST_LPM_ASSERT(lpm != NULL); > >> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> > >> -writer_done = 0; > >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >> +/* Launch reader threads */ > >> +for (i = j; i < num_cores; i++) > >> +rte_eal_remote_launch(reader_f, NULL, > >> +enabled_core_ids[i]); > >> > >> -/* Launch reader threads */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_remote_launch(test_lpm_reader, NULL, > >> -enabled_core_ids[i]); > >> +/* Launch writer threads */ > >> +for (i = 0; i < j; i++) > >> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >> +(void *)(uintptr_t)(i + j), > > This can be just 'j'? > > > >> +enabled_core_ids[i]); > >> > >> -/* Measure add/delete. */ > >> -begin = rte_rdtsc_precise(); > >> -for (i = 0; i < RCU_ITERATIONS; i++) { > >> -/* Add all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > >> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth, > >> -next_hop_add) != 0) { > >> -printf("Failed to add iteration %d, route# > >> %d\n", > >> -i, j); > >> +/* Wait for writer threads */ > >> +for (i = 0; i < j; i++) > >> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > >> goto error; > >> -} > >> > >> -/* Delete all the entries */ > >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > >> -if (rte_lpm_delete(lpm, > >> large_ldepth_route_table[j].ip, > >> -large_ldepth_route_table[j].depth) != 0) { > >> -printf("Failed to delete iteration %d, route# > >> %d\n", > >> -i, j); > >> -goto error; > >> -} > >> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); > >> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > >> +printf("Average LPM Add/Del: %"PRIu64" cycles\n", > >> +__atomic_load_n(&gwrite_cycles, > >> __ATOMIC_RELAXED) > >> +/ TOTAL_WRITES); > >> + > >> +writer_done = 1; > >> +/* Wait until all readers have exited */ > >> +for (i = j; i < num_cores; i++) > >> +rte_eal_wait_lcore(enabled_core_ids[i]); > >> + > >> +rte_lpm_free(lpm); > >> +rte_free(rv); > >> +lpm = NULL; > >> +rv = NULL; > >> } > >> -total_cycles = rte_rdtsc_precise() - begin; > >> - > >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); > >> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > >> -printf("Average LPM Add/Del: %g cycles\n", > >> -(double)total_cycles / TOTAL_WRITES); > >> - > >> -writer_done = 1; > >> -/* Wait until all readers have exited */ > >> -for (i = 0; i < num_cores; i++) > >> -rte_eal_wait_lcore(enabled_core_ids[i]); > >> - > >> -rte_lpm_free(lpm); > >> > >> return 0; > >> > >> @@ -946,9 +767,8 @@ test_lpm_perf(void) > >> rte_lpm_delete_all(lpm); > >> rte_lpm_free(lpm); > >> > >> -test_lpm_rcu_perf(); > >> - > >> -test_lpm_rcu_perf_multi_writer(); > >> +test_lpm_rcu_perf_multi_writer(0); > >> +test_lpm_rcu_perf_multi_writer(1); > >> > >> return 0; > >> } > >> -- > >> 2.17.1 > ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 5:32 ` Honnappa Nagarahalli @ 2020-11-03 14:03 ` Dharmik Thakkar 2020-11-03 14:51 ` Honnappa Nagarahalli 2020-11-03 18:01 ` Medvedkin, Vladimir 0 siblings, 2 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 14:03 UTC (permalink / raw) To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd > On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > <snip> > >>>> >>>> Avoid code duplication by combining single and multi threaded tests >>>> >>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >>>> --- >>>> app/test/test_lpm_perf.c | 362 >>>> ++++++++++----------------------------- >>>> 1 file changed, 91 insertions(+), 271 deletions(-) >>>> >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c >>>> index >>>> 224c92fa3d65..229c835c23f7 100644 >>>> --- a/app/test/test_lpm_perf.c >>>> +++ b/app/test/test_lpm_perf.c >>>> @@ -67,6 +67,12 @@ enum { >>>> IP_CLASS_C >>>> }; >>>> >>>> +enum { >>>> +SINGLE_WRITER = 1, >>>> +MULTI_WRITER_1, >>>> +MULTI_WRITER_2 >>>> +}; >>> Do we need this? Can we use the number of cores instead? >>> >> >> There are 3 combinations of writes (adds/deletes): >> 1. Write all the entries - in case of a single writer 2. Write half of the entries - >> in case of multiple writers 3. Write remaining half of the entries - in case of >> multiple writers >> >> So, I think this is required. > IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space. > Creating a structure with these 2 and passing that to the writer thread would be better and scalable. Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers. To support more number of writers, we need something like this (which I believe is in lines with your suggestion): 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers 2. Pass core ID in linear space as an argument to the writer function: pos_core 3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert I can update the patch to enable more than 2 writers. Do you also suggest we expand the scope of the test to test with more than 2 writers? This will increase the time for which the test is running (which currently is significant even with 2 writers). > >> >>>> + >>>> /* struct route_rule_count defines the total number of rules in >>>> following a/b/c >>>> * each item in a[]/b[]/c[] is the number of common IP address class >>>> A/B/C, not >>>> * including the ones for private local network. >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { >> unsigned >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = >>>> (uint8_t)((uintptr_t)arg); >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); >>>> uint32_t next_hop_add = 0xAA; >>>> >>>> -/* 2 writer threads are used */ >>>> -if (core_id % 2 == 0) { >>>> +/* Single writer (writer_id = 1) */ >>>> +if (writer_id == SINGLE_WRITER) { >>>> +si = 0; >>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES; >>>> +} >>>> +/* 2 Writers (writer_id = 2/3)*/ >>>> +else if (writer_id == MULTI_WRITER_1) { >>>> si = 0; >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; >>>> } else { >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; >>>> i < RCU_ITERATIONS; i++) { >>>> /* Add all the entries */ >>>> for (j = si; j < ei; j++) { >>>> -pthread_mutex_lock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> +pthread_mutex_lock(&lpm_mutex); >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >>>> large_ldepth_route_table[j].depth, >>>> next_hop_add) != 0) { >>>> printf("Failed to add iteration %d, route# %d\n", i, j); >>>> -pthread_mutex_unlock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> + >>>> pthread_mutex_unlock(&lpm_mutex); >>>> return -1; >>>> } >>>> -pthread_mutex_unlock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> +pthread_mutex_unlock(&lpm_mutex); >>>> } >>>> >>>> /* Delete all the entries */ >>>> for (j = si; j < ei; j++) { >>>> -pthread_mutex_lock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> +pthread_mutex_lock(&lpm_mutex); >>>> if (rte_lpm_delete(lpm, >>>> large_ldepth_route_table[j].ip, >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete >>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> + >>>> pthread_mutex_unlock(&lpm_mutex); >>>> return -1; >>>> } >>>> -pthread_mutex_unlock(&lpm_mutex); >>>> +if (writer_id != SINGLE_WRITER) >>>> +pthread_mutex_unlock(&lpm_mutex); >>>> } >>>> } >>>> >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) >>>> >>>> /* >>>> * Functional test: >>>> - * 2 writers, rest are readers >>>> + * 1/2 writers, rest are readers >>>> */ >>>> static int >>>> -test_lpm_rcu_perf_multi_writer(void) >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) >>>> { >>>> struct rte_lpm_config config; >>>> size_t sz; >>>> -unsigned int i; >>>> +unsigned int i, j; >>>> uint16_t core_id; >>>> struct rte_lpm_rcu_config rcu_cfg = {0}; >>>> +int (*reader_f)(void *arg) = NULL; >>>> >>>> if (rte_lcore_count() < 3) { >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at >>>> least 3\n"); @@ -504,273 +522,76 @@ >>>> test_lpm_rcu_perf_multi_writer(void) >>>> num_cores++; >>>> } >>>> >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration >>>> enabled\n", -num_cores - 2); >>>> - >>>> -/* Create LPM table */ >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >> config.number_tbl8s = >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>> -TEST_LPM_ASSERT(lpm != NULL); >>>> - >>>> -/* Init RCU variable */ >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); >>>> - >>>> -rcu_cfg.v = rv; >>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto >>>> error; -} >>>> - >>>> -writer_done = 0; >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>> - >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>> - >>>> -/* Launch reader threads */ >>>> -for (i = 2; i < num_cores; i++) >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >>>> -enabled_core_ids[i]); >>>> - >>>> -/* Launch writer threads */ >>>> -for (i = 0; i < 2; i++) >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>> -(void *)(uintptr_t)i, >>>> -enabled_core_ids[i]); >>>> - >>>> -/* Wait for writer threads */ >>>> -for (i = 0; i < 2; i++) >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; >>>> - >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); >>>> - >>>> -writer_done = 1; >>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); >>>> - >>>> -rte_lpm_free(lpm); >>>> -rte_free(rv); >>>> -lpm = NULL; >>>> -rv = NULL; >>>> - >>>> -/* Test without RCU integration */ >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration >>>> disabled\n", -num_cores - 2); >>>> - >>>> -/* Create LPM table */ >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >> config.number_tbl8s = >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>> -TEST_LPM_ASSERT(lpm != NULL); >>>> - >>>> -writer_done = 0; >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>> - >>>> -/* Launch reader threads */ >>>> -for (i = 2; i < num_cores; i++) >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]); >>>> - >>>> -/* Launch writer threads */ >>>> -for (i = 0; i < 2; i++) >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>> -(void *)(uintptr_t)i, >>>> -enabled_core_ids[i]); >>>> - >>>> -/* Wait for writer threads */ >>>> -for (i = 0; i < 2; i++) >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; >>>> - >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); >>>> - >>>> -writer_done = 1; >>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); >>>> - >>>> -rte_lpm_free(lpm); >>>> - >>>> -return 0; >>>> - >>>> -error: >>>> -writer_done = 1; >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); >>>> - >>>> -rte_lpm_free(lpm); >>>> -rte_free(rv); >>>> - >>>> -return -1; >>>> -} >>>> - >>>> -/* >>>> - * Functional test: >>>> - * Single writer, rest are readers >>>> - */ >>>> -static int >>>> -test_lpm_rcu_perf(void) >>>> -{ >>>> -struct rte_lpm_config config; >>>> -uint64_t begin, total_cycles; >>>> -size_t sz; >>>> -unsigned int i, j; >>>> -uint16_t core_id; >>>> -uint32_t next_hop_add = 0xAA; >>>> -struct rte_lpm_rcu_config rcu_cfg = {0}; >>>> - >>>> -if (rte_lcore_count() < 2) { >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at >>>> least 2\n"); -return TEST_SKIPPED; -} >>>> - >>>> -num_cores = 0; >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { >>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -} >>>> - >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration >>>> enabled\n", -num_cores); >>>> - >>>> -/* Create LPM table */ >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >> config.number_tbl8s = >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>> -TEST_LPM_ASSERT(lpm != NULL); >>>> - >>>> -/* Init RCU variable */ >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); >>>> - >>>> -rcu_cfg.v = rv; >>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto >>>> error; -} >>>> - >>>> -writer_done = 0; >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>> - >>>> -/* Launch reader threads */ >>>> -for (i = 0; i < num_cores; i++) >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >>>> -enabled_core_ids[i]); >>>> - >>>> -/* Measure add/delete. */ >>>> -begin = rte_rdtsc_precise(); >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { >>>> -/* Add all the entries */ >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm, >>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth, >>>> -next_hop_add) != 0) { >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); >>>> +for (j = 1; j < 3; j++) { >>>> +if (use_rcu) >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," >>>> + " RCU integration enabled\n", j, num_cores - j); else >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," >>>> + " RCU integration disabled\n", j, num_cores - j); >>>> + >>>> +/* Create LPM table */ >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> config.number_tbl8s = >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>> +TEST_LPM_ASSERT(lpm != NULL); >>>> + >>>> +/* Init RCU variable */ >>>> +if (use_rcu) { >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores); >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>> + >>>> RTE_CACHE_LINE_SIZE); >>>> +rte_rcu_qsbr_init(rv, num_cores); >>>> + >>>> +rcu_cfg.v = rv; >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); >>>> goto error; >>>> } >>>> >>>> -/* Delete all the entries */ >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete >>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles >>>> = rte_rdtsc_precise() - begin; >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = >>>> +test_lpm_reader; >>>> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); >>>> +writer_done = 0; >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>> >>>> -writer_done = 1; >>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores; >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); >>>> - >>>> -rte_lpm_free(lpm); >>>> -rte_free(rv); >>>> -lpm = NULL; >>>> -rv = NULL; >>>> - >>>> -/* Test without RCU integration */ >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration >>>> disabled\n", -num_cores); >>>> - >>>> -/* Create LPM table */ >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >> config.number_tbl8s = >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>> -TEST_LPM_ASSERT(lpm != NULL); >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>> >>>> -writer_done = 0; >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>> +/* Launch reader threads */ >>>> +for (i = j; i < num_cores; i++) >>>> +rte_eal_remote_launch(reader_f, NULL, >>>> +enabled_core_ids[i]); >>>> >>>> -/* Launch reader threads */ >>>> -for (i = 0; i < num_cores; i++) >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, >>>> -enabled_core_ids[i]); >>>> +/* Launch writer threads */ >>>> +for (i = 0; i < j; i++) >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>> +(void *)(uintptr_t)(i + j), >>> This can be just 'j'? >>> >>>> +enabled_core_ids[i]); >>>> >>>> -/* Measure add/delete. */ >>>> -begin = rte_rdtsc_precise(); >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { >>>> -/* Add all the entries */ >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >>>> -large_ldepth_route_table[j].depth, >>>> -next_hop_add) != 0) { >>>> -printf("Failed to add iteration %d, route# >>>> %d\n", >>>> -i, j); >>>> +/* Wait for writer threads */ >>>> +for (i = 0; i < j; i++) >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >>>> goto error; >>>> -} >>>> >>>> -/* Delete all the entries */ >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >>>> -if (rte_lpm_delete(lpm, >>>> large_ldepth_route_table[j].ip, >>>> -large_ldepth_route_table[j].depth) != 0) { >>>> -printf("Failed to delete iteration %d, route# >>>> %d\n", >>>> -i, j); >>>> -goto error; >>>> -} >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); >>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n", >>>> +__atomic_load_n(&gwrite_cycles, >>>> __ATOMIC_RELAXED) >>>> +/ TOTAL_WRITES); >>>> + >>>> +writer_done = 1; >>>> +/* Wait until all readers have exited */ >>>> +for (i = j; i < num_cores; i++) >>>> +rte_eal_wait_lcore(enabled_core_ids[i]); >>>> + >>>> +rte_lpm_free(lpm); >>>> +rte_free(rv); >>>> +lpm = NULL; >>>> +rv = NULL; >>>> } >>>> -total_cycles = rte_rdtsc_precise() - begin; >>>> - >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); >>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >>>> -printf("Average LPM Add/Del: %g cycles\n", >>>> -(double)total_cycles / TOTAL_WRITES); >>>> - >>>> -writer_done = 1; >>>> -/* Wait until all readers have exited */ >>>> -for (i = 0; i < num_cores; i++) >>>> -rte_eal_wait_lcore(enabled_core_ids[i]); >>>> - >>>> -rte_lpm_free(lpm); >>>> >>>> return 0; >>>> >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) >>>> rte_lpm_delete_all(lpm); >>>> rte_lpm_free(lpm); >>>> >>>> -test_lpm_rcu_perf(); >>>> - >>>> -test_lpm_rcu_perf_multi_writer(); >>>> +test_lpm_rcu_perf_multi_writer(0); >>>> +test_lpm_rcu_perf_multi_writer(1); >>>> >>>> return 0; >>>> } >>>> -- >>>> 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 14:03 ` Dharmik Thakkar @ 2020-11-03 14:51 ` Honnappa Nagarahalli 2020-11-03 18:01 ` Medvedkin, Vladimir 1 sibling, 0 replies; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 14:51 UTC (permalink / raw) To: Dharmik Thakkar Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd <snip> > >>>> > >>>> Avoid code duplication by combining single and multi threaded tests > >>>> > >>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > >>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > >>>> --- > >>>> app/test/test_lpm_perf.c | 362 > >>>> ++++++++++----------------------------- > >>>> 1 file changed, 91 insertions(+), 271 deletions(-) > >>>> > >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > >>>> index > >>>> 224c92fa3d65..229c835c23f7 100644 > >>>> --- a/app/test/test_lpm_perf.c > >>>> +++ b/app/test/test_lpm_perf.c > >>>> @@ -67,6 +67,12 @@ enum { > >>>> IP_CLASS_C > >>>> }; > >>>> > >>>> +enum { > >>>> +SINGLE_WRITER = 1, > >>>> +MULTI_WRITER_1, > >>>> +MULTI_WRITER_2 > >>>> +}; > >>> Do we need this? Can we use the number of cores instead? > >>> > >> > >> There are 3 combinations of writes (adds/deletes): > >> 1. Write all the entries - in case of a single writer 2. Write half > >> of the entries - in case of multiple writers 3. Write remaining half > >> of the entries - in case of multiple writers > >> > >> So, I think this is required. > > IMO, this is not scalable. Essentially, we need 2 parameters to divide the > routes among each writer thread. We need 2 parameters, 1) total number of > writers 2) the core ID in the linear space. > > Creating a structure with these 2 and passing that to the writer thread > would be better and scalable. > > Yes, agreed this is only applicable for 2 writers. Currently, the multi writer > test is only limited to a maximum of 2 writers. > To support more number of writers, we need something like this (which I > believe is in lines with your suggestion): > 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / > num_writers 2. Pass core ID in linear space as an argument to the writer > function: pos_core 3. Calculate si and ei in the writer function: si = pos_core * > single_insert; ei = si + single_insert > > I can update the patch to enable more than 2 writers. > Do you also suggest we expand the scope of the test to test with more than > 2 writers? > This will increase the time for which the test is running (which currently is > significant even with 2 writers). Agree, no to increasing the number of writers. Yes for making the code more generic. > > > > >> > >>>> + > >>>> /* struct route_rule_count defines the total number of rules in > >>>> following a/b/c > >>>> * each item in a[]/b[]/c[] is the number of common IP address class > >>>> A/B/C, not > >>>> * including the ones for private local network. > >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { > >> unsigned > >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = > >>>> (uint8_t)((uintptr_t)arg); > >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); > >>>> uint32_t next_hop_add = 0xAA; > >>>> > >>>> -/* 2 writer threads are used */ > >>>> -if (core_id % 2 == 0) { > >>>> +/* Single writer (writer_id = 1) */ if (writer_id == > >>>> +SINGLE_WRITER) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES; } > >>>> +/* 2 Writers (writer_id = 2/3)*/ > >>>> +else if (writer_id == MULTI_WRITER_1) { > >>>> si = 0; > >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > >>>> } else { > >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = > >>>> 0; i < RCU_ITERATIONS; i++) { > >>>> /* Add all the entries */ > >>>> for (j = si; j < ei; j++) { > >>>> -pthread_mutex_lock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_lock(&lpm_mutex); > >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> large_ldepth_route_table[j].depth, > >>>> next_hop_add) != 0) { > >>>> printf("Failed to add iteration %d, route# %d\n", i, j); > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> + > >>>> pthread_mutex_unlock(&lpm_mutex); > >>>> return -1; > >>>> } > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_unlock(&lpm_mutex); > >>>> } > >>>> > >>>> /* Delete all the entries */ > >>>> for (j = si; j < ei; j++) { > >>>> -pthread_mutex_lock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_lock(&lpm_mutex); > >>>> if (rte_lpm_delete(lpm, > >>>> large_ldepth_route_table[j].ip, > >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete > >>>> iteration %d, route# %d\n", i, j); > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> + > >>>> pthread_mutex_unlock(&lpm_mutex); > >>>> return -1; > >>>> } > >>>> -pthread_mutex_unlock(&lpm_mutex); > >>>> +if (writer_id != SINGLE_WRITER) > >>>> +pthread_mutex_unlock(&lpm_mutex); > >>>> } > >>>> } > >>>> > >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) > >>>> > >>>> /* > >>>> * Functional test: > >>>> - * 2 writers, rest are readers > >>>> + * 1/2 writers, rest are readers > >>>> */ > >>>> static int > >>>> -test_lpm_rcu_perf_multi_writer(void) > >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > >>>> { > >>>> struct rte_lpm_config config; > >>>> size_t sz; > >>>> -unsigned int i; > >>>> +unsigned int i, j; > >>>> uint16_t core_id; > >>>> struct rte_lpm_rcu_config rcu_cfg = {0}; > >>>> +int (*reader_f)(void *arg) = NULL; > >>>> > >>>> if (rte_lcore_count() < 3) { > >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >>>> least 3\n"); @@ -504,273 +522,76 @@ > >>>> test_lpm_rcu_perf_multi_writer(void) > >>>> num_cores++; > >>>> } > >>>> > >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration > >>>> enabled\n", -num_cores - 2); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -/* Init RCU variable */ > >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >>>> - > >>>> -rcu_cfg.v = rv; > >>>> -/* Assign the RCU variable to LPM */ -if > >>>> (rte_lpm_rcu_qsbr_add(lpm, > >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); > >>>> -goto error; -} > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> - > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 2; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Launch writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> -(void *)(uintptr_t)i, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Wait for writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 2; i < > >>>> num_cores; > >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> -lpm = NULL; > >>>> -rv = NULL; > >>>> - > >>>> -/* Test without RCU integration */ -printf("\nPerf test: 2 > >>>> writers, %d readers, RCU integration disabled\n", -num_cores - 2); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 2; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Launch writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> -(void *)(uintptr_t)i, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Wait for writer threads */ > >>>> -for (i = 0; i < 2; i++) > >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: > >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 2; i < > >>>> num_cores; > >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> - > >>>> -return 0; > >>>> - > >>>> -error: > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> - > >>>> -return -1; > >>>> -} > >>>> - > >>>> -/* > >>>> - * Functional test: > >>>> - * Single writer, rest are readers > >>>> - */ > >>>> -static int > >>>> -test_lpm_rcu_perf(void) > >>>> -{ > >>>> -struct rte_lpm_config config; > >>>> -uint64_t begin, total_cycles; > >>>> -size_t sz; > >>>> -unsigned int i, j; > >>>> -uint16_t core_id; > >>>> -uint32_t next_hop_add = 0xAA; > >>>> -struct rte_lpm_rcu_config rcu_cfg = {0}; > >>>> - > >>>> -if (rte_lcore_count() < 2) { > >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at > >>>> least 2\n"); -return TEST_SKIPPED; -} > >>>> - > >>>> -num_cores = 0; > >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { - > enabled_core_ids[num_cores] = > >>>> core_id; -num_cores++; -} > >>>> - > >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration > >>>> enabled\n", -num_cores); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> - > >>>> -/* Init RCU variable */ > >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); > >>>> - > >>>> -rcu_cfg.v = rv; > >>>> -/* Assign the RCU variable to LPM */ -if > >>>> (rte_lpm_rcu_qsbr_add(lpm, > >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); > >>>> -goto error; -} > >>>> - > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> - > >>>> -/* Launch reader threads */ > >>>> -for (i = 0; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> - > >>>> -/* Measure add/delete. */ > >>>> -begin = rte_rdtsc_precise(); > >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { > >>>> -/* Add all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth, > >>>> -next_hop_add) != 0) { > >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >>>> +for (j = 1; j < 3; j++) { > >>>> +if (use_rcu) > >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," > >>>> + " RCU integration enabled\n", j, num_cores - j); else > >>>> +printf("\nPerf test: %d writer(s), %d reader(s)," > >>>> + " RCU integration disabled\n", j, num_cores - j); > >>>> + > >>>> +/* Create LPM table */ > >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > >> config.number_tbl8s = > >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = > >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> +TEST_LPM_ASSERT(lpm != NULL); > >>>> + > >>>> +/* Init RCU variable */ > >>>> +if (use_rcu) { > >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores); > >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > >>>> + > >>>> RTE_CACHE_LINE_SIZE); > >>>> +rte_rcu_qsbr_init(rv, num_cores); > >>>> + > >>>> +rcu_cfg.v = rv; > >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, > >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); > >>>> goto error; > >>>> } > >>>> > >>>> -/* Delete all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to > >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} -} > >>>> -total_cycles = rte_rdtsc_precise() - begin; > >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = > >>>> +test_lpm_reader; > >>>> > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >>>> +writer_done = 0; > >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > >>>> > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 0; i < > >>>> num_cores; > >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> -rte_free(rv); > >>>> -lpm = NULL; > >>>> -rv = NULL; > >>>> - > >>>> -/* Test without RCU integration */ -printf("\nPerf test: 1 writer, > >>>> %d readers, RCU integration disabled\n", -num_cores); > >>>> - > >>>> -/* Create LPM table */ > >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - > >> config.number_tbl8s = > >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = > >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > >>>> -TEST_LPM_ASSERT(lpm != NULL); > >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> > >>>> -writer_done = 0; > >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > >>>> +/* Launch reader threads */ > >>>> +for (i = j; i < num_cores; i++) > >>>> +rte_eal_remote_launch(reader_f, NULL, enabled_core_ids[i]); > >>>> > >>>> -/* Launch reader threads */ > >>>> -for (i = 0; i < num_cores; i++) > >>>> -rte_eal_remote_launch(test_lpm_reader, NULL, > >>>> -enabled_core_ids[i]); > >>>> +/* Launch writer threads */ > >>>> +for (i = 0; i < j; i++) > >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > >>>> +(void *)(uintptr_t)(i + j), > >>> This can be just 'j'? > >>> > >>>> +enabled_core_ids[i]); > >>>> > >>>> -/* Measure add/delete. */ > >>>> -begin = rte_rdtsc_precise(); > >>>> -for (i = 0; i < RCU_ITERATIONS; i++) { > >>>> -/* Add all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth, > >>>> -next_hop_add) != 0) { > >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); > >>>> +/* Wait for writer threads */ > >>>> +for (i = 0; i < j; i++) > >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > >>>> goto error; > >>>> -} > >>>> > >>>> -/* Delete all the entries */ > >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if > >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to > >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} > >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); printf("Total LPM > >>>> +Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: > >>>> +%"PRIu64" cycles\n", __atomic_load_n(&gwrite_cycles, > >>>> __ATOMIC_RELAXED) > >>>> +/ TOTAL_WRITES); > >>>> + > >>>> +writer_done = 1; > >>>> +/* Wait until all readers have exited */ for (i = j; i < > >>>> +num_cores; i++) rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> + > >>>> +rte_lpm_free(lpm); > >>>> +rte_free(rv); > >>>> +lpm = NULL; > >>>> +rv = NULL; > >>>> } > >>>> -total_cycles = rte_rdtsc_precise() - begin; > >>>> - > >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM > >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g > >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); > >>>> - > >>>> -writer_done = 1; > >>>> -/* Wait until all readers have exited */ -for (i = 0; i < > >>>> num_cores; i++) -rte_eal_wait_lcore(enabled_core_ids[i]); > >>>> - > >>>> -rte_lpm_free(lpm); > >>>> > >>>> return 0; > >>>> > >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); > >>>> rte_lpm_free(lpm); > >>>> > >>>> -test_lpm_rcu_perf(); > >>>> - > >>>> -test_lpm_rcu_perf_multi_writer(); > >>>> +test_lpm_rcu_perf_multi_writer(0); > >>>> +test_lpm_rcu_perf_multi_writer(1); > >>>> > >>>> return 0; > >>>> } > >>>> -- > >>>> 2.17.1 > ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 14:03 ` Dharmik Thakkar 2020-11-03 14:51 ` Honnappa Nagarahalli @ 2020-11-03 18:01 ` Medvedkin, Vladimir 1 sibling, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-03 18:01 UTC (permalink / raw) To: Dharmik Thakkar, Honnappa Nagarahalli; +Cc: Bruce Richardson, dev, nd Hi, On 03/11/2020 14:03, Dharmik Thakkar wrote: > > >> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: >> >> <snip> >> >>>>> >>>>> Avoid code duplication by combining single and multi threaded tests >>>>> >>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >>>>> --- >>>>> app/test/test_lpm_perf.c | 362 >>>>> ++++++++++----------------------------- >>>>> 1 file changed, 91 insertions(+), 271 deletions(-) >>>>> >>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c >>>>> index >>>>> 224c92fa3d65..229c835c23f7 100644 >>>>> --- a/app/test/test_lpm_perf.c >>>>> +++ b/app/test/test_lpm_perf.c >>>>> @@ -67,6 +67,12 @@ enum { >>>>> IP_CLASS_C >>>>> }; >>>>> >>>>> +enum { >>>>> +SINGLE_WRITER = 1, >>>>> +MULTI_WRITER_1, >>>>> +MULTI_WRITER_2 >>>>> +}; >>>> Do we need this? Can we use the number of cores instead? >>>> >>> >>> There are 3 combinations of writes (adds/deletes): >>> 1. Write all the entries - in case of a single writer 2. Write half of the entries - >>> in case of multiple writers 3. Write remaining half of the entries - in case of >>> multiple writers >>> >>> So, I think this is required. >> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space. >> Creating a structure with these 2 and passing that to the writer thread would be better and scalable. > > Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers. > To support more number of writers, we need something like this (which I believe is in lines with your suggestion): > 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers > 2. Pass core ID in linear space as an argument to the writer function: pos_core > 3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert > Agree to Honnappa suggestion, for me it looks good, better than previous implementation. > I can update the patch to enable more than 2 writers. > Do you also suggest we expand the scope of the test to test with more than 2 writers? > This will increase the time for which the test is running (which currently is significant even with 2 writers). > I don't see any reason to increase the number of writers more than 2. >> >>> >>>>> + >>>>> /* struct route_rule_count defines the total number of rules in >>>>> following a/b/c >>>>> * each item in a[]/b[]/c[] is the number of common IP address class >>>>> A/B/C, not >>>>> * including the ones for private local network. >>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { >>> unsigned >>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id = >>>>> (uint8_t)((uintptr_t)arg); >>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg); >>>>> uint32_t next_hop_add = 0xAA; >>>>> >>>>> -/* 2 writer threads are used */ >>>>> -if (core_id % 2 == 0) { >>>>> +/* Single writer (writer_id = 1) */ >>>>> +if (writer_id == SINGLE_WRITER) { >>>>> +si = 0; >>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES; >>>>> +} >>>>> +/* 2 Writers (writer_id = 2/3)*/ >>>>> +else if (writer_id == MULTI_WRITER_1) { >>>>> si = 0; >>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; >>>>> } else { >>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; >>>>> i < RCU_ITERATIONS; i++) { >>>>> /* Add all the entries */ >>>>> for (j = si; j < ei; j++) { >>>>> -pthread_mutex_lock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> +pthread_mutex_lock(&lpm_mutex); >>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >>>>> large_ldepth_route_table[j].depth, >>>>> next_hop_add) != 0) { >>>>> printf("Failed to add iteration %d, route# %d\n", i, j); >>>>> -pthread_mutex_unlock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> + >>>>> pthread_mutex_unlock(&lpm_mutex); >>>>> return -1; >>>>> } >>>>> -pthread_mutex_unlock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> +pthread_mutex_unlock(&lpm_mutex); >>>>> } >>>>> >>>>> /* Delete all the entries */ >>>>> for (j = si; j < ei; j++) { >>>>> -pthread_mutex_lock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> +pthread_mutex_lock(&lpm_mutex); >>>>> if (rte_lpm_delete(lpm, >>>>> large_ldepth_route_table[j].ip, >>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete >>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> + >>>>> pthread_mutex_unlock(&lpm_mutex); >>>>> return -1; >>>>> } >>>>> -pthread_mutex_unlock(&lpm_mutex); >>>>> +if (writer_id != SINGLE_WRITER) >>>>> +pthread_mutex_unlock(&lpm_mutex); >>>>> } >>>>> } >>>>> >>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg) >>>>> >>>>> /* >>>>> * Functional test: >>>>> - * 2 writers, rest are readers >>>>> + * 1/2 writers, rest are readers >>>>> */ >>>>> static int >>>>> -test_lpm_rcu_perf_multi_writer(void) >>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) >>>>> { >>>>> struct rte_lpm_config config; >>>>> size_t sz; >>>>> -unsigned int i; >>>>> +unsigned int i, j; >>>>> uint16_t core_id; >>>>> struct rte_lpm_rcu_config rcu_cfg = {0}; >>>>> +int (*reader_f)(void *arg) = NULL; >>>>> >>>>> if (rte_lcore_count() < 3) { >>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at >>>>> least 3\n"); @@ -504,273 +522,76 @@ >>>>> test_lpm_rcu_perf_multi_writer(void) >>>>> num_cores++; >>>>> } >>>>> >>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration >>>>> enabled\n", -num_cores - 2); >>>>> - >>>>> -/* Create LPM table */ >>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >>> config.number_tbl8s = >>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>>> -TEST_LPM_ASSERT(lpm != NULL); >>>>> - >>>>> -/* Init RCU variable */ >>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); >>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); >>>>> - >>>>> -rcu_cfg.v = rv; >>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, >>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto >>>>> error; -} >>>>> - >>>>> -writer_done = 0; >>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>>> - >>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>>> - >>>>> -/* Launch reader threads */ >>>>> -for (i = 2; i < num_cores; i++) >>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >>>>> -enabled_core_ids[i]); >>>>> - >>>>> -/* Launch writer threads */ >>>>> -for (i = 0; i < 2; i++) >>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>>> -(void *)(uintptr_t)i, >>>>> -enabled_core_ids[i]); >>>>> - >>>>> -/* Wait for writer threads */ >>>>> -for (i = 0; i < 2; i++) >>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; >>>>> - >>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: >>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, >>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); >>>>> - >>>>> -writer_done = 1; >>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; >>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); >>>>> - >>>>> -rte_lpm_free(lpm); >>>>> -rte_free(rv); >>>>> -lpm = NULL; >>>>> -rv = NULL; >>>>> - >>>>> -/* Test without RCU integration */ >>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration >>>>> disabled\n", -num_cores - 2); >>>>> - >>>>> -/* Create LPM table */ >>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >>> config.number_tbl8s = >>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>>> -TEST_LPM_ASSERT(lpm != NULL); >>>>> - >>>>> -writer_done = 0; >>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>>> - >>>>> -/* Launch reader threads */ >>>>> -for (i = 2; i < num_cores; i++) >>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]); >>>>> - >>>>> -/* Launch writer threads */ >>>>> -for (i = 0; i < 2; i++) >>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>>> -(void *)(uintptr_t)i, >>>>> -enabled_core_ids[i]); >>>>> - >>>>> -/* Wait for writer threads */ >>>>> -for (i = 0; i < 2; i++) >>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error; >>>>> - >>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: >>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles, >>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES); >>>>> - >>>>> -writer_done = 1; >>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores; >>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]); >>>>> - >>>>> -rte_lpm_free(lpm); >>>>> - >>>>> -return 0; >>>>> - >>>>> -error: >>>>> -writer_done = 1; >>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore(); >>>>> - >>>>> -rte_lpm_free(lpm); >>>>> -rte_free(rv); >>>>> - >>>>> -return -1; >>>>> -} >>>>> - >>>>> -/* >>>>> - * Functional test: >>>>> - * Single writer, rest are readers >>>>> - */ >>>>> -static int >>>>> -test_lpm_rcu_perf(void) >>>>> -{ >>>>> -struct rte_lpm_config config; >>>>> -uint64_t begin, total_cycles; >>>>> -size_t sz; >>>>> -unsigned int i, j; >>>>> -uint16_t core_id; >>>>> -uint32_t next_hop_add = 0xAA; >>>>> -struct rte_lpm_rcu_config rcu_cfg = {0}; >>>>> - >>>>> -if (rte_lcore_count() < 2) { >>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at >>>>> least 2\n"); -return TEST_SKIPPED; -} >>>>> - >>>>> -num_cores = 0; >>>>> -RTE_LCORE_FOREACH_WORKER(core_id) { >>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -} >>>>> - >>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration >>>>> enabled\n", -num_cores); >>>>> - >>>>> -/* Create LPM table */ >>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >>> config.number_tbl8s = >>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>>> -TEST_LPM_ASSERT(lpm != NULL); >>>>> - >>>>> -/* Init RCU variable */ >>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores); >>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores); >>>>> - >>>>> -rcu_cfg.v = rv; >>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm, >>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto >>>>> error; -} >>>>> - >>>>> -writer_done = 0; >>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>>> - >>>>> -/* Launch reader threads */ >>>>> -for (i = 0; i < num_cores; i++) >>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >>>>> -enabled_core_ids[i]); >>>>> - >>>>> -/* Measure add/delete. */ >>>>> -begin = rte_rdtsc_precise(); >>>>> -for (i = 0; i < RCU_ITERATIONS; i++) { >>>>> -/* Add all the entries */ >>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm, >>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth, >>>>> -next_hop_add) != 0) { >>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j); >>>>> +for (j = 1; j < 3; j++) { >>>>> +if (use_rcu) >>>>> +printf("\nPerf test: %d writer(s), %d reader(s)," >>>>> + " RCU integration enabled\n", j, num_cores - j); else >>>>> +printf("\nPerf test: %d writer(s), %d reader(s)," >>>>> + " RCU integration disabled\n", j, num_cores - j); >>>>> + >>>>> +/* Create LPM table */ >>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >>> config.number_tbl8s = >>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm = >>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>>> +TEST_LPM_ASSERT(lpm != NULL); >>>>> + >>>>> +/* Init RCU variable */ >>>>> +if (use_rcu) { >>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores); >>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >>>>> + >>>>> RTE_CACHE_LINE_SIZE); >>>>> +rte_rcu_qsbr_init(rv, num_cores); >>>>> + >>>>> +rcu_cfg.v = rv; >>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm, >>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n"); >>>>> goto error; >>>>> } >>>>> >>>>> -/* Delete all the entries */ >>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if >>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete >>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles >>>>> = rte_rdtsc_precise() - begin; >>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f = >>>>> +test_lpm_reader; >>>>> >>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM >>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g >>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES); >>>>> +writer_done = 0; >>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >>>>> >>>>> -writer_done = 1; >>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores; >>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]); >>>>> - >>>>> -rte_lpm_free(lpm); >>>>> -rte_free(rv); >>>>> -lpm = NULL; >>>>> -rv = NULL; >>>>> - >>>>> -/* Test without RCU integration */ >>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration >>>>> disabled\n", -num_cores); >>>>> - >>>>> -/* Create LPM table */ >>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - >>> config.number_tbl8s = >>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm = >>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >>>>> -TEST_LPM_ASSERT(lpm != NULL); >>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>>> >>>>> -writer_done = 0; >>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >>>>> +/* Launch reader threads */ >>>>> +for (i = j; i < num_cores; i++) >>>>> +rte_eal_remote_launch(reader_f, NULL, >>>>> +enabled_core_ids[i]); >>>>> >>>>> -/* Launch reader threads */ >>>>> -for (i = 0; i < num_cores; i++) >>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, >>>>> -enabled_core_ids[i]); >>>>> +/* Launch writer threads */ >>>>> +for (i = 0; i < j; i++) >>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >>>>> +(void *)(uintptr_t)(i + j), >>>> This can be just 'j'? >>>> >>>>> +enabled_core_ids[i]); >>>>> >>>>> -/* Measure add/delete. */ >>>>> -begin = rte_rdtsc_precise(); >>>>> -for (i = 0; i < RCU_ITERATIONS; i++) { >>>>> -/* Add all the entries */ >>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >>>>> -large_ldepth_route_table[j].depth, >>>>> -next_hop_add) != 0) { >>>>> -printf("Failed to add iteration %d, route# >>>>> %d\n", >>>>> -i, j); >>>>> +/* Wait for writer threads */ >>>>> +for (i = 0; i < j; i++) >>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >>>>> goto error; >>>>> -} >>>>> >>>>> -/* Delete all the entries */ >>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >>>>> -if (rte_lpm_delete(lpm, >>>>> large_ldepth_route_table[j].ip, >>>>> -large_ldepth_route_table[j].depth) != 0) { >>>>> -printf("Failed to delete iteration %d, route# >>>>> %d\n", >>>>> -i, j); >>>>> -goto error; >>>>> -} >>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); >>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n", >>>>> +__atomic_load_n(&gwrite_cycles, >>>>> __ATOMIC_RELAXED) >>>>> +/ TOTAL_WRITES); >>>>> + >>>>> +writer_done = 1; >>>>> +/* Wait until all readers have exited */ >>>>> +for (i = j; i < num_cores; i++) >>>>> +rte_eal_wait_lcore(enabled_core_ids[i]); >>>>> + >>>>> +rte_lpm_free(lpm); >>>>> +rte_free(rv); >>>>> +lpm = NULL; >>>>> +rv = NULL; >>>>> } >>>>> -total_cycles = rte_rdtsc_precise() - begin; >>>>> - >>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); >>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >>>>> -printf("Average LPM Add/Del: %g cycles\n", >>>>> -(double)total_cycles / TOTAL_WRITES); >>>>> - >>>>> -writer_done = 1; >>>>> -/* Wait until all readers have exited */ >>>>> -for (i = 0; i < num_cores; i++) >>>>> -rte_eal_wait_lcore(enabled_core_ids[i]); >>>>> - >>>>> -rte_lpm_free(lpm); >>>>> >>>>> return 0; >>>>> >>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) >>>>> rte_lpm_delete_all(lpm); >>>>> rte_lpm_free(lpm); >>>>> >>>>> -test_lpm_rcu_perf(); >>>>> - >>>>> -test_lpm_rcu_perf_multi_writer(); >>>>> +test_lpm_rcu_perf_multi_writer(0); >>>>> +test_lpm_rcu_perf_multi_writer(1); >>>>> >>>>> return 0; >>>>> } >>>>> -- >>>>> 2.17.1 > -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (3 preceding siblings ...) 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-03 5:12 ` Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (4 more replies) 4 siblings, 5 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 5:12 UTC (permalink / raw) Cc: dev, nd, Dharmik Thakkar Fix LPM adds, LPM deletes, and cycle calculation. Return error if LPM add/delete fails in multi-writer test. Remove redundant error checking for readers. Combine single and multi threaded test cases to avoid code duplication. --- v3: - Add 'goto error' - Remove unnecessary if statement v2: - Add more details about the fix to the commit message - Replace hard coded values with an enum - Remove lock acquire/release for single writer Dharmik Thakkar (4): test/lpm: fix cycle calculation in rcu qsbr perf test/lpm: return error on failure in rcu qsbr perf test/lpm: remove error checking in rcu qsbr perf test/lpm: avoid code duplication in rcu qsbr perf app/test/test_lpm_perf.c | 381 ++++++++++----------------------------- 1 file changed, 94 insertions(+), 287 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar @ 2020-11-03 5:12 ` Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar ` (3 subsequent siblings) 4 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 5:12 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Honnappa Nagarahalli, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in RCU QSBR perf tests Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS' for calculating adds, deletes, and cycles. Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed since it is always a fixed value equal to TOTAL_WRITES. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c5a238b9d1e8..45164b23214b 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; -static uint64_t gwrites; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define NUM_ROUTE_ENTRIES num_route_entries #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) + enum { IP_CLASS_A, IP_CLASS_B, @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - RTE_SET_USED(arg); /* 2 writer threads are used */ if (core_id % 2 == 0) { si = 0; @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) total_cycles = rte_rdtsc_precise() - begin; __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); - __atomic_fetch_add(&gwrites, - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, - __ATOMIC_RELAXED); return 0; } @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); /* Wait and check return value from reader threads */ writer_done = 1; @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); /* Launch reader threads */ @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-03 5:12 ` Dharmik Thakkar 2020-11-03 5:21 ` Honnappa Nagarahalli 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar ` (2 subsequent siblings) 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 5:12 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu, Honnappa Nagarahalli Cc: dev, nd, Dharmik Thakkar, stable Return error if Add/Delete fail in multiwriter perf test Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 45164b23214b..fc4c9b60cbbc 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg) next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg) large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg) __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); return 0; + +error: + pthread_mutex_unlock(&lpm_mutex); + return -1; } /* -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-03 5:21 ` Honnappa Nagarahalli 0 siblings, 0 replies; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 5:21 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd <snip> > > Return error if Add/Delete fail in multiwriter perf test > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 6 ++++++ > 1 file changed, 6 insertions(+) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > 45164b23214b..fc4c9b60cbbc 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg) > next_hop_add) != 0) { > printf("Failed to add iteration %d, route# > %d\n", > i, j); > + goto error; > } > pthread_mutex_unlock(&lpm_mutex); > } > @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg) > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# > %d\n", > i, j); > + goto error; > } > pthread_mutex_unlock(&lpm_mutex); > } > @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg) > __atomic_fetch_add(&gwrite_cycles, total_cycles, > __ATOMIC_RELAXED); > > return 0; > + > +error: > + pthread_mutex_unlock(&lpm_mutex); > + return -1; > } > > /* > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-03 5:12 ` Dharmik Thakkar 2020-11-03 5:22 ` Honnappa Nagarahalli 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 5:12 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Honnappa Nagarahalli, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable Remove redundant error checking for reader threads since they never return error. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index fc4c9b60cbbc..fa6ebc4f7547 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void) __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES); - /* Wait and check return value from reader threads */ writer_done = 1; + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void) / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - printf("Warning: lcore %u not finished.\n", - enabled_core_ids[i]); + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-03 5:22 ` Honnappa Nagarahalli 0 siblings, 0 replies; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 5:22 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd > -----Original Message----- > From: Dharmik Thakkar <dharmik.thakkar@arm.com> > Sent: Monday, November 2, 2020 11:12 PM > To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin > <vladimir.medvedkin@intel.com>; Gavin Hu <Gavin.Hu@arm.com>; > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang > <Ruifeng.Wang@arm.com> > Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar > <Dharmik.Thakkar@arm.com>; stable@dpdk.org > Subject: [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf > > Remove redundant error checking for reader threads since they never return > error. > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 21 ++++++++------------- > 1 file changed, 8 insertions(+), 13 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > fc4c9b60cbbc..fa6ebc4f7547 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void) > __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > / TOTAL_WRITES); > > - /* Wait and check return value from reader threads */ > writer_done = 1; > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > rte_free(rv); > @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void) > / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > rte_free(rv); > @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - printf("Warning: lcore %u not finished.\n", > - enabled_core_ids[i]); > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (2 preceding siblings ...) 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-03 5:12 ` Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 5:12 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar Avoid code duplication by combining single and multi threaded tests Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 359 ++++++++++----------------------------- 1 file changed, 89 insertions(+), 270 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index fa6ebc4f7547..147801634210 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -67,6 +67,12 @@ enum { IP_CLASS_C }; +enum { + SINGLE_WRITER = 1, + MULTI_WRITER_1, + MULTI_WRITER_2 +}; + /* struct route_rule_count defines the total number of rules in following a/b/c * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not * including the ones for private local network. @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg) { unsigned int i, j, si, ei; uint64_t begin, total_cycles; - uint8_t core_id = (uint8_t)((uintptr_t)arg); + uint8_t writer_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - /* 2 writer threads are used */ - if (core_id % 2 == 0) { + /* Single writer (writer_id = 1) */ + if (writer_id == SINGLE_WRITER) { + si = 0; + ei = NUM_LDEPTH_ROUTE_ENTRIES; + } + /* 2 Writers (writer_id = 2/3)*/ + else if (writer_id == MULTI_WRITER_1) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; } else { @@ -447,7 +458,8 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0; i < RCU_ITERATIONS; i++) { /* Add all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth, next_hop_add) != 0) { @@ -455,19 +467,22 @@ test_lpm_rcu_qsbr_writer(void *arg) i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); } /* Delete all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); } } @@ -478,22 +493,24 @@ test_lpm_rcu_qsbr_writer(void *arg) return 0; error: - pthread_mutex_unlock(&lpm_mutex); + if (writer_id != SINGLE_WRITER) + pthread_mutex_unlock(&lpm_mutex); return -1; } /* * Functional test: - * 2 writers, rest are readers + * 1/2 writers, rest are readers */ static int -test_lpm_rcu_perf_multi_writer(void) +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) { struct rte_lpm_config config; size_t sz; - unsigned int i; + unsigned int i, j; uint16_t core_id; struct rte_lpm_rcu_config rcu_cfg = {0}; + int (*reader_f)(void *arg) = NULL; if (rte_lcore_count() < 3) { printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); @@ -506,273 +523,76 @@ test_lpm_rcu_perf_multi_writer(void) num_cores++; } - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - - return 0; - -error: - writer_done = 1; - /* Wait until all readers have exited */ - rte_eal_mp_wait_lcore(); - - rte_lpm_free(lpm); - rte_free(rv); - - return -1; -} - -/* - * Functional test: - * Single writer, rest are readers - */ -static int -test_lpm_rcu_perf(void) -{ - struct rte_lpm_config config; - uint64_t begin, total_cycles; - size_t sz; - unsigned int i, j; - uint16_t core_id; - uint32_t next_hop_add = 0xAA; - struct rte_lpm_rcu_config rcu_cfg = {0}; - - if (rte_lcore_count() < 2) { - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); - return TEST_SKIPPED; - } - - num_cores = 0; - RTE_LCORE_FOREACH_WORKER(core_id) { - enabled_core_ids[num_cores] = core_id; - num_cores++; - } - - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + for (j = 1; j < 3; j++) { + if (use_rcu) + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration enabled\n", j, num_cores - j); + else + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration disabled\n", j, num_cores - j); + + /* Create LPM table */ + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; + config.flags = 0; + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + + /* Init RCU variable */ + if (use_rcu) { + sz = rte_rcu_qsbr_get_memsize(num_cores); + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, + RTE_CACHE_LINE_SIZE); + rte_rcu_qsbr_init(rv, num_cores); + + rcu_cfg.v = rv; + /* Assign the RCU variable to LPM */ + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { + printf("RCU variable assignment failed\n"); goto error; } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } - } - total_cycles = rte_rdtsc_precise() - begin; + reader_f = test_lpm_rcu_qsbr_reader; + } else + reader_f = test_lpm_reader; - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); + writer_done = 0; + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); + /* Launch reader threads */ + for (i = j; i < num_cores; i++) + rte_eal_remote_launch(reader_f, NULL, + enabled_core_ids[i]); - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); + /* Launch writer threads */ + for (i = 0; i < j; i++) + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, + (void *)(uintptr_t)(i + j), + enabled_core_ids[i]); - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + /* Wait for writer threads */ + for (i = 0; i < j; i++) + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); + printf("Average LPM Add/Del: %"PRIu64" cycles\n", + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); + + writer_done = 1; + /* Wait until all readers have exited */ + for (i = j; i < num_cores; i++) + rte_eal_wait_lcore(enabled_core_ids[i]); + + rte_lpm_free(lpm); + rte_free(rv); + lpm = NULL; + rv = NULL; } - total_cycles = rte_rdtsc_precise() - begin; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); return 0; @@ -948,9 +768,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - test_lpm_rcu_perf(); - - test_lpm_rcu_perf_multi_writer(); + test_lpm_rcu_perf_multi_writer(0); + test_lpm_rcu_perf_multi_writer(1); return 0; } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (3 preceding siblings ...) 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-03 22:23 ` Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (4 more replies) 4 siblings, 5 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw) Cc: dev, nd, Dharmik Thakkar Fix LPM adds, LPM deletes, and cycle calculation. Return error if LPM add/delete fails in multi-writer test. Return error if single or multi writer test fails Remove redundant error checking for readers. Combine single and multi threaded test cases to avoid code duplication. --- v4: - Return error if rcu qsbr test fails - Improve multi writer test to enable more than 2 writers v3: - Add 'goto error' - Remove unnecessary if statement v2: - Add more details about the fix to the commit message - Replace hard coded values with an enum - Remove lock acquire/release for single writer Dharmik Thakkar (4): test/lpm: fix cycle calculation in rcu qsbr perf test/lpm: return error on failure in rcu qsbr perf test/lpm: remove error checking in rcu qsbr perf test/lpm: avoid code duplication in rcu qsbr perf app/test/test_lpm_perf.c | 383 ++++++++++----------------------------- 1 file changed, 91 insertions(+), 292 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar @ 2020-11-03 22:23 ` Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar ` (3 subsequent siblings) 4 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Honnappa Nagarahalli, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in RCU QSBR perf tests Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS' for calculating adds, deletes, and cycles. Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed since it is always a fixed value equal to TOTAL_WRITES. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c5a238b9d1e8..45164b23214b 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; -static uint64_t gwrites; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define NUM_ROUTE_ENTRIES num_route_entries #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) + enum { IP_CLASS_A, IP_CLASS_B, @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - RTE_SET_USED(arg); /* 2 writer threads are used */ if (core_id % 2 == 0) { si = 0; @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) total_cycles = rte_rdtsc_precise() - begin; __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); - __atomic_fetch_add(&gwrites, - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, - __ATOMIC_RELAXED); return 0; } @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); /* Wait and check return value from reader threads */ writer_done = 1; @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); /* Launch reader threads */ @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-03 22:23 ` Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar ` (2 subsequent siblings) 4 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Return error if Add/Delete fail in multiwriter perf test Return error if single or multi writer test fails Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 45164b23214b..873ecf511c97 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg) next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg) large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg) __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); return 0; + +error: + pthread_mutex_unlock(&lpm_mutex); + return -1; } /* @@ -947,9 +953,11 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - test_lpm_rcu_perf(); + if (test_lpm_rcu_perf() < 0) + return -1; - test_lpm_rcu_perf_multi_writer(); + if (test_lpm_rcu_perf_multi_writer() < 0) + return -1; return 0; } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-03 22:23 ` Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Gavin Hu, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable Remove redundant error checking for reader threads since they never return error. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 873ecf511c97..c8e70ec89ff5 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void) __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES); - /* Wait and check return value from reader threads */ writer_done = 1; + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void) / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - printf("Warning: lcore %u not finished.\n", - enabled_core_ids[i]); + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (2 preceding siblings ...) 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-03 22:23 ` Dharmik Thakkar 2020-11-03 22:35 ` Honnappa Nagarahalli 2020-11-04 15:46 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 4 siblings, 2 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar Avoid code duplication by combining single and multi threaded tests Also, enable support for more than 2 writers Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> --- app/test/test_lpm_perf.c | 359 +++++++++------------------------------ 1 file changed, 84 insertions(+), 275 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c8e70ec89ff5..a1485e74e77f 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; +static uint32_t single_insert; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg) { unsigned int i, j, si, ei; uint64_t begin, total_cycles; - uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; + bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ? + true : false; + uint8_t pos_core = (uint8_t)((uintptr_t)arg); - /* 2 writer threads are used */ - if (core_id % 2 == 0) { - si = 0; - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; - } else { - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; - ei = NUM_LDEPTH_ROUTE_ENTRIES; - } + si = pos_core * single_insert; + ei = si + single_insert; /* Measure add/delete. */ begin = rte_rdtsc_precise(); for (i = 0; i < RCU_ITERATIONS; i++) { /* Add all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (!single_writer) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth, next_hop_add) != 0) { @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg) i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (!single_writer) + pthread_mutex_unlock(&lpm_mutex); } /* Delete all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (!single_writer) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (!single_writer) + pthread_mutex_unlock(&lpm_mutex); } } @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg) return 0; error: - pthread_mutex_unlock(&lpm_mutex); + if (!single_writer) + pthread_mutex_unlock(&lpm_mutex); return -1; } /* * Functional test: - * 2 writers, rest are readers + * 1/2 writers, rest are readers */ static int -test_lpm_rcu_perf_multi_writer(void) +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) { struct rte_lpm_config config; size_t sz; - unsigned int i; + unsigned int i, j; uint16_t core_id; struct rte_lpm_rcu_config rcu_cfg = {0}; + int (*reader_f)(void *arg) = NULL; if (rte_lcore_count() < 3) { printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void) num_cores++; } - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - - return 0; - -error: - writer_done = 1; - /* Wait until all readers have exited */ - rte_eal_mp_wait_lcore(); - - rte_lpm_free(lpm); - rte_free(rv); - - return -1; -} - -/* - * Functional test: - * Single writer, rest are readers - */ -static int -test_lpm_rcu_perf(void) -{ - struct rte_lpm_config config; - uint64_t begin, total_cycles; - size_t sz; - unsigned int i, j; - uint16_t core_id; - uint32_t next_hop_add = 0xAA; - struct rte_lpm_rcu_config rcu_cfg = {0}; - - if (rte_lcore_count() < 2) { - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); - return TEST_SKIPPED; - } - - num_cores = 0; - RTE_LCORE_FOREACH_WORKER(core_id) { - enabled_core_ids[num_cores] = core_id; - num_cores++; - } - - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); - goto error; - } - - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); + for (j = 1; j < 3; j++) { + if (use_rcu) + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration enabled\n", j, num_cores - j); + else + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration disabled\n", j, num_cores - j); + + /* Calculate writes by each writer */ + single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j; + + /* Create LPM table */ + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; + config.flags = 0; + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + + /* Init RCU variable */ + if (use_rcu) { + sz = rte_rcu_qsbr_get_memsize(num_cores); + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, + RTE_CACHE_LINE_SIZE); + rte_rcu_qsbr_init(rv, num_cores); + + rcu_cfg.v = rv; + /* Assign the RCU variable to LPM */ + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { + printf("RCU variable assignment failed\n"); goto error; } - } - total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); + reader_f = test_lpm_rcu_qsbr_reader; + } else + reader_f = test_lpm_reader; - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; + writer_done = 0; + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - /* Test without RCU integration */ - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); + /* Launch reader threads */ + for (i = j; i < num_cores; i++) + rte_eal_remote_launch(reader_f, NULL, + enabled_core_ids[i]); - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); + /* Launch writer threads */ + for (i = 0; i < j; i++) + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, + (void *)(uintptr_t)i, + enabled_core_ids[i]); - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + /* Wait for writer threads */ + for (i = 0; i < j; i++) + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); + printf("Average LPM Add/Del: %"PRIu64" cycles\n", + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); + + writer_done = 1; + /* Wait until all readers have exited */ + for (i = j; i < num_cores; i++) + rte_eal_wait_lcore(enabled_core_ids[i]); + + rte_lpm_free(lpm); + rte_free(rv); + lpm = NULL; + rv = NULL; } - total_cycles = rte_rdtsc_precise() - begin; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); return 0; @@ -948,10 +757,10 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - if (test_lpm_rcu_perf() < 0) + if (test_lpm_rcu_perf_multi_writer(0) < 0) return -1; - if (test_lpm_rcu_perf_multi_writer() < 0) + if (test_lpm_rcu_perf_multi_writer(1) < 0) return -1; return 0; -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-03 22:35 ` Honnappa Nagarahalli 2020-11-04 15:46 ` Medvedkin, Vladimir 1 sibling, 0 replies; 52+ messages in thread From: Honnappa Nagarahalli @ 2020-11-03 22:35 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd > -----Original Message----- > From: dev <dev-bounces@dpdk.org> On Behalf Of Dharmik Thakkar > Sent: Tuesday, November 3, 2020 4:24 PM > To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin > <vladimir.medvedkin@intel.com> > Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar > <Dharmik.Thakkar@arm.com> > Subject: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu > qsbr perf > > Avoid code duplication by combining single and multi threaded tests > > Also, enable support for more than 2 writers > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 359 +++++++++------------------------------ > 1 file changed, 84 insertions(+), 275 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index > c8e70ec89ff5..a1485e74e77f 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t > writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; > +static uint32_t single_insert; > /* LPM APIs are not thread safe, use mutex to provide thread safety */ > static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; > > @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg) { > unsigned int i, j, si, ei; > uint64_t begin, total_cycles; > - uint8_t core_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > + bool single_writer = (single_insert == > NUM_LDEPTH_ROUTE_ENTRIES) ? > + true : false; > + uint8_t pos_core = (uint8_t)((uintptr_t)arg); > > - /* 2 writer threads are used */ > - if (core_id % 2 == 0) { > - si = 0; > - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - } else { > - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - ei = NUM_LDEPTH_ROUTE_ENTRIES; > - } > + si = pos_core * single_insert; > + ei = si + single_insert; > > /* Measure add/delete. */ > begin = rte_rdtsc_precise(); > for (i = 0; i < RCU_ITERATIONS; i++) { > /* Add all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth, > next_hop_add) != 0) { > @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg) > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > } > > /* Delete all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# > %d\n", > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > } > } > > @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg) > return 0; > > error: > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > return -1; > } > > /* > * Functional test: > - * 2 writers, rest are readers > + * 1/2 writers, rest are readers > */ > static int > -test_lpm_rcu_perf_multi_writer(void) > +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > { > struct rte_lpm_config config; > size_t sz; > - unsigned int i; > + unsigned int i, j; > uint16_t core_id; > struct rte_lpm_rcu_config rcu_cfg = {0}; > + int (*reader_f)(void *arg) = NULL; > > if (rte_lcore_count() < 3) { > printf("Not enough cores for lpm_rcu_perf_autotest, > expecting at least 3\n"); @@ -506,273 +509,79 @@ > test_lpm_rcu_perf_multi_writer(void) > num_cores++; > } > > - printf("\nPerf test: 2 writers, %d readers, RCU integration > enabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 2 writers, %d readers, RCU integration > disabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - > - return 0; > - > -error: > - writer_done = 1; > - /* Wait until all readers have exited */ > - rte_eal_mp_wait_lcore(); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - > - return -1; > -} > - > -/* > - * Functional test: > - * Single writer, rest are readers > - */ > -static int > -test_lpm_rcu_perf(void) > -{ > - struct rte_lpm_config config; > - uint64_t begin, total_cycles; > - size_t sz; > - unsigned int i, j; > - uint16_t core_id; > - uint32_t next_hop_add = 0xAA; > - struct rte_lpm_rcu_config rcu_cfg = {0}; > - > - if (rte_lcore_count() < 2) { > - printf("Not enough cores for lpm_rcu_perf_autotest, > expecting at least 2\n"); > - return TEST_SKIPPED; > - } > - > - num_cores = 0; > - RTE_LCORE_FOREACH_WORKER(core_id) { > - enabled_core_ids[num_cores] = core_id; > - num_cores++; > - } > - > - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# > %d\n", > - i, j); > - goto error; > - } > - > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# > %d\n", > - i, j); > + for (j = 1; j < 3; j++) { > + if (use_rcu) > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration enabled\n", j, num_cores - j); > + else > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration disabled\n", j, num_cores - j); > + > + /* Calculate writes by each writer */ > + single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j; > + > + /* Create LPM table */ > + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > + config.flags = 0; > + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > + TEST_LPM_ASSERT(lpm != NULL); > + > + /* Init RCU variable */ > + if (use_rcu) { > + sz = rte_rcu_qsbr_get_memsize(num_cores); > + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > + > RTE_CACHE_LINE_SIZE); > + rte_rcu_qsbr_init(rv, num_cores); > + > + rcu_cfg.v = rv; > + /* Assign the RCU variable to LPM */ > + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > + printf("RCU variable assignment failed\n"); > goto error; > } > - } > - total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > + reader_f = test_lpm_rcu_qsbr_reader; > + } else > + reader_f = test_lpm_reader; > > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > + writer_done = 0; > + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > > - /* Test without RCU integration */ > - printf("\nPerf test: 1 writer, %d readers, RCU integration > disabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > + /* Launch reader threads */ > + for (i = j; i < num_cores; i++) > + rte_eal_remote_launch(reader_f, NULL, > + enabled_core_ids[i]); > > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > + /* Launch writer threads */ > + for (i = 0; i < j; i++) > + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > + (void *)(uintptr_t)i, > + enabled_core_ids[i]); > > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# > %d\n", > - i, j); > + /* Wait for writer threads */ > + for (i = 0; i < j; i++) > + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > - } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, > large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# > %d\n", > - i, j); > - goto error; > - } > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > + printf("Average LPM Add/Del: %"PRIu64" cycles\n", > + __atomic_load_n(&gwrite_cycles, > __ATOMIC_RELAXED) > + / TOTAL_WRITES); > + > + writer_done = 1; > + /* Wait until all readers have exited */ > + for (i = j; i < num_cores; i++) > + rte_eal_wait_lcore(enabled_core_ids[i]); > + > + rte_lpm_free(lpm); > + rte_free(rv); > + lpm = NULL; > + rv = NULL; > } > - total_cycles = rte_rdtsc_precise() - begin; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > > return 0; > > @@ -948,10 +757,10 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - if (test_lpm_rcu_perf() < 0) > + if (test_lpm_rcu_perf_multi_writer(0) < 0) > return -1; > > - if (test_lpm_rcu_perf_multi_writer() < 0) > + if (test_lpm_rcu_perf_multi_writer(1) < 0) > return -1; > > return 0; > -- > 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 22:35 ` Honnappa Nagarahalli @ 2020-11-04 15:46 ` Medvedkin, Vladimir 2020-11-04 16:49 ` Dharmik Thakkar 1 sibling, 1 reply; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-04 15:46 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd Hi Thakkar, On 03/11/2020 22:23, Dharmik Thakkar wrote: > Avoid code duplication by combining single and multi threaded tests > > Also, enable support for more than 2 writers > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > app/test/test_lpm_perf.c | 359 +++++++++------------------------------ > 1 file changed, 84 insertions(+), 275 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index c8e70ec89ff5..a1485e74e77f 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; > static volatile uint8_t writer_done; > static volatile uint32_t thr_id; > static uint64_t gwrite_cycles; > +static uint32_t single_insert; > /* LPM APIs are not thread safe, use mutex to provide thread safety */ > static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; > > @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg) > { > unsigned int i, j, si, ei; > uint64_t begin, total_cycles; > - uint8_t core_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > + bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ? > + true : false; > + uint8_t pos_core = (uint8_t)((uintptr_t)arg); > > - /* 2 writer threads are used */ > - if (core_id % 2 == 0) { > - si = 0; > - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - } else { > - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - ei = NUM_LDEPTH_ROUTE_ENTRIES; > - } > + si = pos_core * single_insert; > + ei = si + single_insert; > In this case, given that you are doing "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;" below, the number of ldepth_routes must be a multiple of the number of writers, so some number of routes can be skipped in the opposite case. Consider something like: number_of_writers = j; ... si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers; ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers; > /* Measure add/delete. */ > begin = rte_rdtsc_precise(); > for (i = 0; i < RCU_ITERATIONS; i++) { > /* Add all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth, > next_hop_add) != 0) { > @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg) > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > } > > /* Delete all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# %d\n", > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > } > } > > @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg) > return 0; > > error: > - pthread_mutex_unlock(&lpm_mutex); > + if (!single_writer) > + pthread_mutex_unlock(&lpm_mutex); > return -1; > } > > /* > * Functional test: > - * 2 writers, rest are readers > + * 1/2 writers, rest are readers > */ > static int > -test_lpm_rcu_perf_multi_writer(void) > +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > { > struct rte_lpm_config config; > size_t sz; > - unsigned int i; > + unsigned int i, j; > uint16_t core_id; > struct rte_lpm_rcu_config rcu_cfg = {0}; > + int (*reader_f)(void *arg) = NULL; > > if (rte_lcore_count() < 3) { > printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); > @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void) > num_cores++; > } > > - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - > - return 0; > - > -error: > - writer_done = 1; > - /* Wait until all readers have exited */ > - rte_eal_mp_wait_lcore(); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - > - return -1; > -} > - > -/* > - * Functional test: > - * Single writer, rest are readers > - */ > -static int > -test_lpm_rcu_perf(void) > -{ > - struct rte_lpm_config config; > - uint64_t begin, total_cycles; > - size_t sz; > - unsigned int i, j; > - uint16_t core_id; > - uint32_t next_hop_add = 0xAA; > - struct rte_lpm_rcu_config rcu_cfg = {0}; > - > - if (rte_lcore_count() < 2) { > - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); > - return TEST_SKIPPED; > - } > - > - num_cores = 0; > - RTE_LCORE_FOREACH_WORKER(core_id) { > - enabled_core_ids[num_cores] = core_id; > - num_cores++; > - } > - > - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > - goto error; > - } > - > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > + for (j = 1; j < 3; j++) { > + if (use_rcu) > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration enabled\n", j, num_cores - j); > + else > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration disabled\n", j, num_cores - j); > + > + /* Calculate writes by each writer */ > + single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j; > + > + /* Create LPM table */ > + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > + config.flags = 0; > + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > + TEST_LPM_ASSERT(lpm != NULL); > + > + /* Init RCU variable */ > + if (use_rcu) { > + sz = rte_rcu_qsbr_get_memsize(num_cores); > + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > + RTE_CACHE_LINE_SIZE); > + rte_rcu_qsbr_init(rv, num_cores); > + > + rcu_cfg.v = rv; > + /* Assign the RCU variable to LPM */ > + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > + printf("RCU variable assignment failed\n"); > goto error; > } > - } > - total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > + reader_f = test_lpm_rcu_qsbr_reader; > + } else > + reader_f = test_lpm_reader; > > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > + writer_done = 0; > + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > > - /* Test without RCU integration */ > - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > + /* Launch reader threads */ > + for (i = j; i < num_cores; i++) > + rte_eal_remote_launch(reader_f, NULL, > + enabled_core_ids[i]); > > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > + /* Launch writer threads */ > + for (i = 0; i < j; i++) > + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > + (void *)(uintptr_t)i, > + enabled_core_ids[i]); > > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > + /* Wait for writer threads */ > + for (i = 0; i < j; i++) > + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > - } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > - goto error; > - } > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > + printf("Average LPM Add/Del: %"PRIu64" cycles\n", > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > + > + writer_done = 1; > + /* Wait until all readers have exited */ > + for (i = j; i < num_cores; i++) > + rte_eal_wait_lcore(enabled_core_ids[i]); > + > + rte_lpm_free(lpm); > + rte_free(rv); > + lpm = NULL; > + rv = NULL; > } > - total_cycles = rte_rdtsc_precise() - begin; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > > return 0; > > @@ -948,10 +757,10 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - if (test_lpm_rcu_perf() < 0) > + if (test_lpm_rcu_perf_multi_writer(0) < 0) > return -1; > > - if (test_lpm_rcu_perf_multi_writer() < 0) > + if (test_lpm_rcu_perf_multi_writer(1) < 0) > return -1; > > return 0; > -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-04 15:46 ` Medvedkin, Vladimir @ 2020-11-04 16:49 ` Dharmik Thakkar 0 siblings, 0 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 16:49 UTC (permalink / raw) To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd > On Nov 4, 2020, at 9:46 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote: > > Hi Thakkar, > > On 03/11/2020 22:23, Dharmik Thakkar wrote: >> Avoid code duplication by combining single and multi threaded tests >> Also, enable support for more than 2 writers >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> app/test/test_lpm_perf.c | 359 +++++++++------------------------------ >> 1 file changed, 84 insertions(+), 275 deletions(-) >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c >> index c8e70ec89ff5..a1485e74e77f 100644 >> --- a/app/test/test_lpm_perf.c >> +++ b/app/test/test_lpm_perf.c >> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; >> static volatile uint8_t writer_done; >> static volatile uint32_t thr_id; >> static uint64_t gwrite_cycles; >> +static uint32_t single_insert; >> /* LPM APIs are not thread safe, use mutex to provide thread safety */ >> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; >> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg) >> { >> unsigned int i, j, si, ei; >> uint64_t begin, total_cycles; >> - uint8_t core_id = (uint8_t)((uintptr_t)arg); >> uint32_t next_hop_add = 0xAA; >> + bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ? >> + true : false; >> + uint8_t pos_core = (uint8_t)((uintptr_t)arg); >> - /* 2 writer threads are used */ >> - if (core_id % 2 == 0) { >> - si = 0; >> - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; >> - } else { >> - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; >> - ei = NUM_LDEPTH_ROUTE_ENTRIES; >> - } >> + si = pos_core * single_insert; >> + ei = si + single_insert; > > In this case, given that you are doing > "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;" > below, the number of ldepth_routes must be a multiple of the number of writers, so some number of routes can be skipped in the opposite case. Consider something like: > > number_of_writers = j; > ... > si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers; > ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers; > Yes, agreed some routes can be skipped. I will update the patch with the above changes. Thanks! > >> /* Measure add/delete. */ >> begin = rte_rdtsc_precise(); >> for (i = 0; i < RCU_ITERATIONS; i++) { >> /* Add all the entries */ >> for (j = si; j < ei; j++) { >> - pthread_mutex_lock(&lpm_mutex); >> + if (!single_writer) >> + pthread_mutex_lock(&lpm_mutex); >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> large_ldepth_route_table[j].depth, >> next_hop_add) != 0) { >> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg) >> i, j); >> goto error; >> } >> - pthread_mutex_unlock(&lpm_mutex); >> + if (!single_writer) >> + pthread_mutex_unlock(&lpm_mutex); >> } >> /* Delete all the entries */ >> for (j = si; j < ei; j++) { >> - pthread_mutex_lock(&lpm_mutex); >> + if (!single_writer) >> + pthread_mutex_lock(&lpm_mutex); >> if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >> large_ldepth_route_table[j].depth) != 0) { >> printf("Failed to delete iteration %d, route# %d\n", >> i, j); >> goto error; >> } >> - pthread_mutex_unlock(&lpm_mutex); >> + if (!single_writer) >> + pthread_mutex_unlock(&lpm_mutex); >> } >> } >> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg) >> return 0; >> error: >> - pthread_mutex_unlock(&lpm_mutex); >> + if (!single_writer) >> + pthread_mutex_unlock(&lpm_mutex); >> return -1; >> } >> /* >> * Functional test: >> - * 2 writers, rest are readers >> + * 1/2 writers, rest are readers >> */ >> static int >> -test_lpm_rcu_perf_multi_writer(void) >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) >> { >> struct rte_lpm_config config; >> size_t sz; >> - unsigned int i; >> + unsigned int i, j; >> uint16_t core_id; >> struct rte_lpm_rcu_config rcu_cfg = {0}; >> + int (*reader_f)(void *arg) = NULL; >> if (rte_lcore_count() < 3) { >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); >> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void) >> num_cores++; >> } >> - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> - / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> - >> - /* Test without RCU integration */ >> - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", >> - num_cores - 2); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - writer_done = 0; >> - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Launch writer threads */ >> - for (i = 0; i < 2; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> - (void *)(uintptr_t)i, >> - enabled_core_ids[i]); >> - >> - /* Wait for writer threads */ >> - for (i = 0; i < 2; i++) >> - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> - goto error; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> - / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 2; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - >> - return 0; >> - >> -error: >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - rte_eal_mp_wait_lcore(); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - >> - return -1; >> -} >> - >> -/* >> - * Functional test: >> - * Single writer, rest are readers >> - */ >> -static int >> -test_lpm_rcu_perf(void) >> -{ >> - struct rte_lpm_config config; >> - uint64_t begin, total_cycles; >> - size_t sz; >> - unsigned int i, j; >> - uint16_t core_id; >> - uint32_t next_hop_add = 0xAA; >> - struct rte_lpm_rcu_config rcu_cfg = {0}; >> - >> - if (rte_lcore_count() < 2) { >> - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); >> - return TEST_SKIPPED; >> - } >> - >> - num_cores = 0; >> - RTE_LCORE_FOREACH_WORKER(core_id) { >> - enabled_core_ids[num_cores] = core_id; >> - num_cores++; >> - } >> - >> - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", >> - num_cores); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> - >> - /* Init RCU variable */ >> - sz = rte_rcu_qsbr_get_memsize(num_cores); >> - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> - RTE_CACHE_LINE_SIZE); >> - rte_rcu_qsbr_init(rv, num_cores); >> - >> - rcu_cfg.v = rv; >> - /* Assign the RCU variable to LPM */ >> - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> - printf("RCU variable assignment failed\n"); >> - goto error; >> - } >> - >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, >> - enabled_core_ids[i]); >> - >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# %d\n", >> - i, j); >> - goto error; >> - } >> - >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# %d\n", >> - i, j); >> + for (j = 1; j < 3; j++) { >> + if (use_rcu) >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration enabled\n", j, num_cores - j); >> + else >> + printf("\nPerf test: %d writer(s), %d reader(s)," >> + " RCU integration disabled\n", j, num_cores - j); >> + >> + /* Calculate writes by each writer */ >> + single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j; >> + >> + /* Create LPM table */ >> + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> + config.flags = 0; >> + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> + TEST_LPM_ASSERT(lpm != NULL); >> + >> + /* Init RCU variable */ >> + if (use_rcu) { >> + sz = rte_rcu_qsbr_get_memsize(num_cores); >> + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, >> + RTE_CACHE_LINE_SIZE); >> + rte_rcu_qsbr_init(rv, num_cores); >> + >> + rcu_cfg.v = rv; >> + /* Assign the RCU variable to LPM */ >> + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { >> + printf("RCU variable assignment failed\n"); >> goto error; >> } >> - } >> - total_cycles = rte_rdtsc_precise() - begin; >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> + reader_f = test_lpm_rcu_qsbr_reader; >> + } else >> + reader_f = test_lpm_reader; >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> - rte_free(rv); >> - lpm = NULL; >> - rv = NULL; >> + writer_done = 0; >> + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); >> - /* Test without RCU integration */ >> - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", >> - num_cores); >> - >> - /* Create LPM table */ >> - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; >> - config.flags = 0; >> - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); >> - TEST_LPM_ASSERT(lpm != NULL); >> + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> - writer_done = 0; >> - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); >> + /* Launch reader threads */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_remote_launch(reader_f, NULL, >> + enabled_core_ids[i]); >> - /* Launch reader threads */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_remote_launch(test_lpm_reader, NULL, >> - enabled_core_ids[i]); >> + /* Launch writer threads */ >> + for (i = 0; i < j; i++) >> + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, >> + (void *)(uintptr_t)i, >> + enabled_core_ids[i]); >> - /* Measure add/delete. */ >> - begin = rte_rdtsc_precise(); >> - for (i = 0; i < RCU_ITERATIONS; i++) { >> - /* Add all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth, >> - next_hop_add) != 0) { >> - printf("Failed to add iteration %d, route# %d\n", >> - i, j); >> + /* Wait for writer threads */ >> + for (i = 0; i < j; i++) >> + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) >> goto error; >> - } >> - /* Delete all the entries */ >> - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) >> - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, >> - large_ldepth_route_table[j].depth) != 0) { >> - printf("Failed to delete iteration %d, route# %d\n", >> - i, j); >> - goto error; >> - } >> + printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> + printf("Average LPM Add/Del: %"PRIu64" cycles\n", >> + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) >> + / TOTAL_WRITES); >> + >> + writer_done = 1; >> + /* Wait until all readers have exited */ >> + for (i = j; i < num_cores; i++) >> + rte_eal_wait_lcore(enabled_core_ids[i]); >> + >> + rte_lpm_free(lpm); >> + rte_free(rv); >> + lpm = NULL; >> + rv = NULL; >> } >> - total_cycles = rte_rdtsc_precise() - begin; >> - >> - printf("Total LPM Adds: %d\n", TOTAL_WRITES); >> - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); >> - printf("Average LPM Add/Del: %g cycles\n", >> - (double)total_cycles / TOTAL_WRITES); >> - >> - writer_done = 1; >> - /* Wait until all readers have exited */ >> - for (i = 0; i < num_cores; i++) >> - rte_eal_wait_lcore(enabled_core_ids[i]); >> - >> - rte_lpm_free(lpm); >> return 0; >> @@ -948,10 +757,10 @@ test_lpm_perf(void) >> rte_lpm_delete_all(lpm); >> rte_lpm_free(lpm); >> - if (test_lpm_rcu_perf() < 0) >> + if (test_lpm_rcu_perf_multi_writer(0) < 0) >> return -1; >> - if (test_lpm_rcu_perf_multi_writer() < 0) >> + if (test_lpm_rcu_perf_multi_writer(1) < 0) >> return -1; >> return 0; > > -- > Regards, > Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (3 preceding siblings ...) 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-04 18:58 ` Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar ` (4 more replies) 4 siblings, 5 replies; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw) Cc: dev, nd, Dharmik Thakkar Fix LPM adds, LPM deletes, and cycle calculation. Return error if LPM add/delete fails in multi-writer test. Return error if single or multi writer test fails Remove redundant error checking for readers. Combine single and multi threaded test cases to avoid code duplication. --- v5: - Update logic for routes inserted by each writer in multi writer test to avoid possibility of routes getting skipped v4: - Return error if rcu qsbr test fails - Improve multi writer test to enable more than 2 writers v3: - Add 'goto error' - Remove unnecessary if statement v2: - Add more details about the fix to the commit message - Replace hard coded values with an enum - Remove lock acquire/release for single writer Dharmik Thakkar (4): test/lpm: fix cycle calculation in rcu qsbr perf test/lpm: return error on failure in rcu qsbr perf test/lpm: remove error checking in rcu qsbr perf test/lpm: avoid code duplication in rcu qsbr perf app/test/test_lpm_perf.c | 380 +++++++++------------------------------ 1 file changed, 88 insertions(+), 292 deletions(-) -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar @ 2020-11-04 18:58 ` Dharmik Thakkar 2020-11-04 19:34 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar ` (3 subsequent siblings) 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang, Honnappa Nagarahalli Cc: dev, nd, Dharmik Thakkar, stable Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in RCU QSBR perf tests Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS' for calculating adds, deletes, and cycles. Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES. For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed since it is always a fixed value equal to TOTAL_WRITES. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c5a238b9d1e8..45164b23214b 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; -static uint64_t gwrites; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; #define NUM_ROUTE_ENTRIES num_route_entries #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) + enum { IP_CLASS_A, IP_CLASS_B, @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; - RTE_SET_USED(arg); /* 2 writer threads are used */ if (core_id % 2 == 0) { si = 0; @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) total_cycles = rte_rdtsc_precise() - begin; __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); - __atomic_fetch_add(&gwrites, - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, - __ATOMIC_RELAXED); return 0; } @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); /* Wait and check return value from reader threads */ writer_done = 1; @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) writer_done = 0; __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); /* Launch reader threads */ @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - printf("Total LPM Adds: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) - ); + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) } total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); - printf("Total LPM Deletes: %d\n", - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); + (double)total_cycles / TOTAL_WRITES); writer_done = 1; /* Wait and check return value from reader threads */ -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-04 19:34 ` Medvedkin, Vladimir 0 siblings, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-04 19:34 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Gavin Hu, Ruifeng Wang, Honnappa Nagarahalli Cc: dev, nd, stable On 04/11/2020 18:58, Dharmik Thakkar wrote: > Fix incorrect calculations for LPM adds, LPM deletes, > and average cycles in RCU QSBR perf tests > > Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not > 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS' > for calculating adds, deletes, and cycles. > > Also, for multi-writer perf test, each writer only writes > half of NUM_LDEPTH_ROUTE_ENTRIES. > For 2 writers, total adds (or deletes) should be > (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of > (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES). > > Since, for both the single and multi writer tests, total adds/deletes > is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), > this has been replaced with a macro 'TOTAL_WRITES' and furthermore, > 'g_writes' has been removed since it is always a fixed value > equal to TOTAL_WRITES. > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 45 ++++++++++++++-------------------------- > 1 file changed, 16 insertions(+), 29 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index c5a238b9d1e8..45164b23214b 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv; > static volatile uint8_t writer_done; > static volatile uint32_t thr_id; > static uint64_t gwrite_cycles; > -static uint64_t gwrites; > /* LPM APIs are not thread safe, use mutex to provide thread safety */ > static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; > > @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries; > #define NUM_ROUTE_ENTRIES num_route_entries > #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries > > +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) > + > enum { > IP_CLASS_A, > IP_CLASS_B, > @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg) > uint8_t core_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > > - RTE_SET_USED(arg); > /* 2 writer threads are used */ > if (core_id % 2 == 0) { > si = 0; > @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg) > total_cycles = rte_rdtsc_precise() - begin; > > __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); > - __atomic_fetch_add(&gwrites, > - 2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS, > - __ATOMIC_RELAXED); > > return 0; > } > @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void) > > writer_done = 0; > __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); > > __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void) > if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > > - printf("Total LPM Adds: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / > - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) > - ); > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > > /* Wait and check return value from reader threads */ > writer_done = 1; > @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void) > > writer_done = 0; > __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED); > __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > /* Launch reader threads */ > @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void) > if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > > - printf("Total LPM Adds: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - 2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / > - __atomic_load_n(&gwrites, __ATOMIC_RELAXED) > - ); > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ > @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void) > } > total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); > + (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ > @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void) > } > total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > - printf("Total LPM Deletes: %d\n", > - ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES); > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS)); > + (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > /* Wait and check return value from reader threads */ > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar @ 2020-11-04 18:58 ` Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar ` (2 subsequent siblings) 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Honnappa Nagarahalli, Ruifeng Wang Cc: dev, nd, Dharmik Thakkar, stable Return error if Add/Delete fail in multiwriter perf test Return error if single or multi writer test fails Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 45164b23214b..873ecf511c97 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg) next_hop_add) != 0) { printf("Failed to add iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg) large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); + goto error; } pthread_mutex_unlock(&lpm_mutex); } @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg) __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); return 0; + +error: + pthread_mutex_unlock(&lpm_mutex); + return -1; } /* @@ -947,9 +953,11 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - test_lpm_rcu_perf(); + if (test_lpm_rcu_perf() < 0) + return -1; - test_lpm_rcu_perf_multi_writer(); + if (test_lpm_rcu_perf_multi_writer() < 0) + return -1; return 0; } -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-04 19:35 ` Medvedkin, Vladimir 0 siblings, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Gavin Hu, Honnappa Nagarahalli, Ruifeng Wang Cc: dev, nd, stable On 04/11/2020 18:58, Dharmik Thakkar wrote: > Return error if Add/Delete fail in multiwriter perf test > > Return error if single or multi writer test fails > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 12 ++++++++++-- > 1 file changed, 10 insertions(+), 2 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index 45164b23214b..873ecf511c97 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg) > next_hop_add) != 0) { > printf("Failed to add iteration %d, route# %d\n", > i, j); > + goto error; > } > pthread_mutex_unlock(&lpm_mutex); > } > @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg) > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# %d\n", > i, j); > + goto error; > } > pthread_mutex_unlock(&lpm_mutex); > } > @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg) > __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED); > > return 0; > + > +error: > + pthread_mutex_unlock(&lpm_mutex); > + return -1; > } > > /* > @@ -947,9 +953,11 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - test_lpm_rcu_perf(); > + if (test_lpm_rcu_perf() < 0) > + return -1; > > - test_lpm_rcu_perf_multi_writer(); > + if (test_lpm_rcu_perf_multi_writer() < 0) > + return -1; > > return 0; > } > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar @ 2020-11-04 18:58 ` Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-05 15:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu Cc: dev, nd, Dharmik Thakkar, stable Remove redundant error checking for reader threads since they never return error. Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") Cc: honnappa.nagarahalli@arm.com Cc: stable@dpdk.org Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index 873ecf511c97..c8e70ec89ff5 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void) __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES); - /* Wait and check return value from reader threads */ writer_done = 1; + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void) / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 2; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); rte_free(rv); @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void) (double)total_cycles / TOTAL_WRITES); writer_done = 1; - /* Wait and check return value from reader threads */ + /* Wait until all readers have exited */ for (i = 0; i < num_cores; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - printf("Warning: lcore %u not finished.\n", - enabled_core_ids[i]); + rte_eal_wait_lcore(enabled_core_ids[i]); rte_lpm_free(lpm); -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-04 19:35 ` Medvedkin, Vladimir 0 siblings, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu Cc: dev, nd, stable On 04/11/2020 18:58, Dharmik Thakkar wrote: > Remove redundant error checking for reader threads > since they never return error. > > Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests") > Cc: honnappa.nagarahalli@arm.com > Cc: stable@dpdk.org > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 21 ++++++++------------- > 1 file changed, 8 insertions(+), 13 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index 873ecf511c97..c8e70ec89ff5 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void) > __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > / TOTAL_WRITES); > > - /* Wait and check return value from reader threads */ > writer_done = 1; > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > rte_free(rv); > @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void) > / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 2; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > rte_free(rv); > @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void) > (double)total_cycles / TOTAL_WRITES); > > writer_done = 1; > - /* Wait and check return value from reader threads */ > + /* Wait until all readers have exited */ > for (i = 0; i < num_cores; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - printf("Warning: lcore %u not finished.\n", > - enabled_core_ids[i]); > + rte_eal_wait_lcore(enabled_core_ids[i]); > > rte_lpm_free(lpm); > > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (2 preceding siblings ...) 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar @ 2020-11-04 18:58 ` Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-05 15:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand 4 siblings, 1 reply; 52+ messages in thread From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw) To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar Avoid code duplication by combining single and multi threaded tests Also, enable support for more than 2 writers Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> --- app/test/test_lpm_perf.c | 356 +++++++++------------------------------ 1 file changed, 81 insertions(+), 275 deletions(-) diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index c8e70ec89ff5..2bed00d0648f 100644 --- a/app/test/test_lpm_perf.c +++ b/app/test/test_lpm_perf.c @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; static volatile uint8_t writer_done; static volatile uint32_t thr_id; static uint64_t gwrite_cycles; +static uint32_t num_writers; /* LPM APIs are not thread safe, use mutex to provide thread safety */ static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg) { unsigned int i, j, si, ei; uint64_t begin, total_cycles; - uint8_t core_id = (uint8_t)((uintptr_t)arg); uint32_t next_hop_add = 0xAA; + uint8_t pos_core = (uint8_t)((uintptr_t)arg); - /* 2 writer threads are used */ - if (core_id % 2 == 0) { - si = 0; - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; - } else { - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; - ei = NUM_LDEPTH_ROUTE_ENTRIES; - } + si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; + ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; /* Measure add/delete. */ begin = rte_rdtsc_precise(); for (i = 0; i < RCU_ITERATIONS; i++) { /* Add all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (num_writers > 1) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth, next_hop_add) != 0) { @@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg) i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (num_writers > 1) + pthread_mutex_unlock(&lpm_mutex); } /* Delete all the entries */ for (j = si; j < ei; j++) { - pthread_mutex_lock(&lpm_mutex); + if (num_writers > 1) + pthread_mutex_lock(&lpm_mutex); if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete iteration %d, route# %d\n", i, j); goto error; } - pthread_mutex_unlock(&lpm_mutex); + if (num_writers > 1) + pthread_mutex_unlock(&lpm_mutex); } } @@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg) return 0; error: - pthread_mutex_unlock(&lpm_mutex); + if (num_writers > 1) + pthread_mutex_unlock(&lpm_mutex); return -1; } /* * Functional test: - * 2 writers, rest are readers + * 1/2 writers, rest are readers */ static int -test_lpm_rcu_perf_multi_writer(void) +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) { struct rte_lpm_config config; size_t sz; - unsigned int i; + unsigned int i, j; uint16_t core_id; struct rte_lpm_rcu_config rcu_cfg = {0}; + int (*reader_f)(void *arg) = NULL; if (rte_lcore_count() < 3) { printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); @@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void) num_cores++; } - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; - - /* Test without RCU integration */ - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", - num_cores - 2); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - writer_done = 0; - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 2; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); - - /* Launch writer threads */ - for (i = 0; i < 2; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, - (void *)(uintptr_t)i, - enabled_core_ids[i]); - - /* Wait for writer threads */ - for (i = 0; i < 2; i++) - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) - goto error; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %"PRIu64" cycles\n", - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) - / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 2; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - - return 0; - -error: - writer_done = 1; - /* Wait until all readers have exited */ - rte_eal_mp_wait_lcore(); - - rte_lpm_free(lpm); - rte_free(rv); - - return -1; -} - -/* - * Functional test: - * Single writer, rest are readers - */ -static int -test_lpm_rcu_perf(void) -{ - struct rte_lpm_config config; - uint64_t begin, total_cycles; - size_t sz; - unsigned int i, j; - uint16_t core_id; - uint32_t next_hop_add = 0xAA; - struct rte_lpm_rcu_config rcu_cfg = {0}; - - if (rte_lcore_count() < 2) { - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); - return TEST_SKIPPED; - } - - num_cores = 0; - RTE_LCORE_FOREACH_WORKER(core_id) { - enabled_core_ids[num_cores] = core_id; - num_cores++; - } - - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); - - /* Init RCU variable */ - sz = rte_rcu_qsbr_get_memsize(num_cores); - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, - RTE_CACHE_LINE_SIZE); - rte_rcu_qsbr_init(rv, num_cores); - - rcu_cfg.v = rv; - /* Assign the RCU variable to LPM */ - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { - printf("RCU variable assignment failed\n"); - goto error; - } - - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, - enabled_core_ids[i]); - - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); - goto error; - } - - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); + for (j = 1; j < 3; j++) { + if (use_rcu) + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration enabled\n", j, num_cores - j); + else + printf("\nPerf test: %d writer(s), %d reader(s)," + " RCU integration disabled\n", j, num_cores - j); + + num_writers = j; + + /* Create LPM table */ + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; + config.flags = 0; + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + + /* Init RCU variable */ + if (use_rcu) { + sz = rte_rcu_qsbr_get_memsize(num_cores); + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, + RTE_CACHE_LINE_SIZE); + rte_rcu_qsbr_init(rv, num_cores); + + rcu_cfg.v = rv; + /* Assign the RCU variable to LPM */ + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { + printf("RCU variable assignment failed\n"); goto error; } - } - total_cycles = rte_rdtsc_precise() - begin; - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); + reader_f = test_lpm_rcu_qsbr_reader; + } else + reader_f = test_lpm_reader; - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); - rte_free(rv); - lpm = NULL; - rv = NULL; + writer_done = 0; + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); - /* Test without RCU integration */ - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", - num_cores); - - /* Create LPM table */ - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; - config.flags = 0; - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); - TEST_LPM_ASSERT(lpm != NULL); + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); - writer_done = 0; - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); + /* Launch reader threads */ + for (i = j; i < num_cores; i++) + rte_eal_remote_launch(reader_f, NULL, + enabled_core_ids[i]); - /* Launch reader threads */ - for (i = 0; i < num_cores; i++) - rte_eal_remote_launch(test_lpm_reader, NULL, - enabled_core_ids[i]); + /* Launch writer threads */ + for (i = 0; i < j; i++) + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, + (void *)(uintptr_t)i, + enabled_core_ids[i]); - /* Measure add/delete. */ - begin = rte_rdtsc_precise(); - for (i = 0; i < RCU_ITERATIONS; i++) { - /* Add all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth, - next_hop_add) != 0) { - printf("Failed to add iteration %d, route# %d\n", - i, j); + /* Wait for writer threads */ + for (i = 0; i < j; i++) + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) goto error; - } - /* Delete all the entries */ - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, - large_ldepth_route_table[j].depth) != 0) { - printf("Failed to delete iteration %d, route# %d\n", - i, j); - goto error; - } + printf("Total LPM Adds: %d\n", TOTAL_WRITES); + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); + printf("Average LPM Add/Del: %"PRIu64" cycles\n", + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) + / TOTAL_WRITES); + + writer_done = 1; + /* Wait until all readers have exited */ + for (i = j; i < num_cores; i++) + rte_eal_wait_lcore(enabled_core_ids[i]); + + rte_lpm_free(lpm); + rte_free(rv); + lpm = NULL; + rv = NULL; } - total_cycles = rte_rdtsc_precise() - begin; - - printf("Total LPM Adds: %d\n", TOTAL_WRITES); - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); - printf("Average LPM Add/Del: %g cycles\n", - (double)total_cycles / TOTAL_WRITES); - - writer_done = 1; - /* Wait until all readers have exited */ - for (i = 0; i < num_cores; i++) - rte_eal_wait_lcore(enabled_core_ids[i]); - - rte_lpm_free(lpm); return 0; @@ -948,10 +754,10 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm); rte_lpm_free(lpm); - if (test_lpm_rcu_perf() < 0) + if (test_lpm_rcu_perf_multi_writer(0) < 0) return -1; - if (test_lpm_rcu_perf_multi_writer() < 0) + if (test_lpm_rcu_perf_multi_writer(1) < 0) return -1; return 0; -- 2.17.1 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-04 19:35 ` Medvedkin, Vladimir 0 siblings, 0 replies; 52+ messages in thread From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw) To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd On 04/11/2020 18:58, Dharmik Thakkar wrote: > Avoid code duplication by combining single and multi threaded tests > > Also, enable support for more than 2 writers > > Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > app/test/test_lpm_perf.c | 356 +++++++++------------------------------ > 1 file changed, 81 insertions(+), 275 deletions(-) > > diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c > index c8e70ec89ff5..2bed00d0648f 100644 > --- a/app/test/test_lpm_perf.c > +++ b/app/test/test_lpm_perf.c > @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv; > static volatile uint8_t writer_done; > static volatile uint32_t thr_id; > static uint64_t gwrite_cycles; > +static uint32_t num_writers; > /* LPM APIs are not thread safe, use mutex to provide thread safety */ > static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER; > > @@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg) > { > unsigned int i, j, si, ei; > uint64_t begin, total_cycles; > - uint8_t core_id = (uint8_t)((uintptr_t)arg); > uint32_t next_hop_add = 0xAA; > + uint8_t pos_core = (uint8_t)((uintptr_t)arg); > > - /* 2 writer threads are used */ > - if (core_id % 2 == 0) { > - si = 0; > - ei = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - } else { > - si = NUM_LDEPTH_ROUTE_ENTRIES / 2; > - ei = NUM_LDEPTH_ROUTE_ENTRIES; > - } > + si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; > + ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers; > > /* Measure add/delete. */ > begin = rte_rdtsc_precise(); > for (i = 0; i < RCU_ITERATIONS; i++) { > /* Add all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (num_writers > 1) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth, > next_hop_add) != 0) { > @@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg) > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (num_writers > 1) > + pthread_mutex_unlock(&lpm_mutex); > } > > /* Delete all the entries */ > for (j = si; j < ei; j++) { > - pthread_mutex_lock(&lpm_mutex); > + if (num_writers > 1) > + pthread_mutex_lock(&lpm_mutex); > if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > large_ldepth_route_table[j].depth) != 0) { > printf("Failed to delete iteration %d, route# %d\n", > i, j); > goto error; > } > - pthread_mutex_unlock(&lpm_mutex); > + if (num_writers > 1) > + pthread_mutex_unlock(&lpm_mutex); > } > } > > @@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg) > return 0; > > error: > - pthread_mutex_unlock(&lpm_mutex); > + if (num_writers > 1) > + pthread_mutex_unlock(&lpm_mutex); > return -1; > } > > /* > * Functional test: > - * 2 writers, rest are readers > + * 1/2 writers, rest are readers > */ > static int > -test_lpm_rcu_perf_multi_writer(void) > +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu) > { > struct rte_lpm_config config; > size_t sz; > - unsigned int i; > + unsigned int i, j; > uint16_t core_id; > struct rte_lpm_rcu_config rcu_cfg = {0}; > + int (*reader_f)(void *arg) = NULL; > > if (rte_lcore_count() < 3) { > printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n"); > @@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void) > num_cores++; > } > > - printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > - > - /* Test without RCU integration */ > - printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n", > - num_cores - 2); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - writer_done = 0; > - __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 2; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > - > - /* Launch writer threads */ > - for (i = 0; i < 2; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > - (void *)(uintptr_t)i, > - enabled_core_ids[i]); > - > - /* Wait for writer threads */ > - for (i = 0; i < 2; i++) > - if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > - goto error; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %"PRIu64" cycles\n", > - __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > - / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 2; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - > - return 0; > - > -error: > - writer_done = 1; > - /* Wait until all readers have exited */ > - rte_eal_mp_wait_lcore(); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - > - return -1; > -} > - > -/* > - * Functional test: > - * Single writer, rest are readers > - */ > -static int > -test_lpm_rcu_perf(void) > -{ > - struct rte_lpm_config config; > - uint64_t begin, total_cycles; > - size_t sz; > - unsigned int i, j; > - uint16_t core_id; > - uint32_t next_hop_add = 0xAA; > - struct rte_lpm_rcu_config rcu_cfg = {0}; > - > - if (rte_lcore_count() < 2) { > - printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n"); > - return TEST_SKIPPED; > - } > - > - num_cores = 0; > - RTE_LCORE_FOREACH_WORKER(core_id) { > - enabled_core_ids[num_cores] = core_id; > - num_cores++; > - } > - > - printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > - > - /* Init RCU variable */ > - sz = rte_rcu_qsbr_get_memsize(num_cores); > - rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > - RTE_CACHE_LINE_SIZE); > - rte_rcu_qsbr_init(rv, num_cores); > - > - rcu_cfg.v = rv; > - /* Assign the RCU variable to LPM */ > - if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > - printf("RCU variable assignment failed\n"); > - goto error; > - } > - > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > - > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL, > - enabled_core_ids[i]); > - > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > - goto error; > - } > - > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > + for (j = 1; j < 3; j++) { > + if (use_rcu) > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration enabled\n", j, num_cores - j); > + else > + printf("\nPerf test: %d writer(s), %d reader(s)," > + " RCU integration disabled\n", j, num_cores - j); > + > + num_writers = j; > + > + /* Create LPM table */ > + config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > + config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > + config.flags = 0; > + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > + TEST_LPM_ASSERT(lpm != NULL); > + > + /* Init RCU variable */ > + if (use_rcu) { > + sz = rte_rcu_qsbr_get_memsize(num_cores); > + rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz, > + RTE_CACHE_LINE_SIZE); > + rte_rcu_qsbr_init(rv, num_cores); > + > + rcu_cfg.v = rv; > + /* Assign the RCU variable to LPM */ > + if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) { > + printf("RCU variable assignment failed\n"); > goto error; > } > - } > - total_cycles = rte_rdtsc_precise() - begin; > > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > + reader_f = test_lpm_rcu_qsbr_reader; > + } else > + reader_f = test_lpm_reader; > > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > - rte_free(rv); > - lpm = NULL; > - rv = NULL; > + writer_done = 0; > + __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED); > > - /* Test without RCU integration */ > - printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n", > - num_cores); > - > - /* Create LPM table */ > - config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; > - config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES; > - config.flags = 0; > - lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); > - TEST_LPM_ASSERT(lpm != NULL); > + __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > > - writer_done = 0; > - __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST); > + /* Launch reader threads */ > + for (i = j; i < num_cores; i++) > + rte_eal_remote_launch(reader_f, NULL, > + enabled_core_ids[i]); > > - /* Launch reader threads */ > - for (i = 0; i < num_cores; i++) > - rte_eal_remote_launch(test_lpm_reader, NULL, > - enabled_core_ids[i]); > + /* Launch writer threads */ > + for (i = 0; i < j; i++) > + rte_eal_remote_launch(test_lpm_rcu_qsbr_writer, > + (void *)(uintptr_t)i, > + enabled_core_ids[i]); > > - /* Measure add/delete. */ > - begin = rte_rdtsc_precise(); > - for (i = 0; i < RCU_ITERATIONS; i++) { > - /* Add all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth, > - next_hop_add) != 0) { > - printf("Failed to add iteration %d, route# %d\n", > - i, j); > + /* Wait for writer threads */ > + for (i = 0; i < j; i++) > + if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) > goto error; > - } > > - /* Delete all the entries */ > - for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) > - if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip, > - large_ldepth_route_table[j].depth) != 0) { > - printf("Failed to delete iteration %d, route# %d\n", > - i, j); > - goto error; > - } > + printf("Total LPM Adds: %d\n", TOTAL_WRITES); > + printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > + printf("Average LPM Add/Del: %"PRIu64" cycles\n", > + __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) > + / TOTAL_WRITES); > + > + writer_done = 1; > + /* Wait until all readers have exited */ > + for (i = j; i < num_cores; i++) > + rte_eal_wait_lcore(enabled_core_ids[i]); > + > + rte_lpm_free(lpm); > + rte_free(rv); > + lpm = NULL; > + rv = NULL; > } > - total_cycles = rte_rdtsc_precise() - begin; > - > - printf("Total LPM Adds: %d\n", TOTAL_WRITES); > - printf("Total LPM Deletes: %d\n", TOTAL_WRITES); > - printf("Average LPM Add/Del: %g cycles\n", > - (double)total_cycles / TOTAL_WRITES); > - > - writer_done = 1; > - /* Wait until all readers have exited */ > - for (i = 0; i < num_cores; i++) > - rte_eal_wait_lcore(enabled_core_ids[i]); > - > - rte_lpm_free(lpm); > > return 0; > > @@ -948,10 +754,10 @@ test_lpm_perf(void) > rte_lpm_delete_all(lpm); > rte_lpm_free(lpm); > > - if (test_lpm_rcu_perf() < 0) > + if (test_lpm_rcu_perf_multi_writer(0) < 0) > return -1; > > - if (test_lpm_rcu_perf_multi_writer() < 0) > + if (test_lpm_rcu_perf_multi_writer(1) < 0) > return -1; > > return 0; > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar ` (3 preceding siblings ...) 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar @ 2020-11-05 15:58 ` David Marchand 4 siblings, 0 replies; 52+ messages in thread From: David Marchand @ 2020-11-05 15:58 UTC (permalink / raw) To: Dharmik Thakkar; +Cc: dev, nd On Wed, Nov 4, 2020 at 7:59 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote: > > Fix LPM adds, LPM deletes, and cycle calculation. > Return error if LPM add/delete fails in multi-writer test. > Return error if single or multi writer test fails > Remove redundant error checking for readers. > Combine single and multi threaded test cases to avoid code duplication. Series applied, thanks Dharmik. -- David Marchand ^ permalink raw reply [flat|nested] 52+ messages in thread
end of thread, other threads:[~2020-11-05 15:59 UTC | newest] Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar 2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-02 17:17 ` Medvedkin, Vladimir 2020-11-02 22:11 ` Dharmik Thakkar 2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand 2020-11-02 15:11 ` Bruce Richardson 2020-11-02 16:58 ` Dharmik Thakkar 2020-11-02 17:21 ` Medvedkin, Vladimir 2020-11-02 17:33 ` Bruce Richardson 2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-03 1:30 ` Honnappa Nagarahalli 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-11-03 1:28 ` Honnappa Nagarahalli 2020-11-03 4:42 ` Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar 2020-11-03 1:21 ` Honnappa Nagarahalli 2020-11-03 4:56 ` Dharmik Thakkar 2020-11-02 23:52 ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 4:21 ` Honnappa Nagarahalli 2020-11-03 4:33 ` Dharmik Thakkar 2020-11-03 5:32 ` Honnappa Nagarahalli 2020-11-03 14:03 ` Dharmik Thakkar 2020-11-03 14:51 ` Honnappa Nagarahalli 2020-11-03 18:01 ` Medvedkin, Vladimir 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-11-03 5:21 ` Honnappa Nagarahalli 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar 2020-11-03 5:22 ` Honnappa Nagarahalli 2020-11-03 5:12 ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar 2020-11-03 22:23 ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-03 22:35 ` Honnappa Nagarahalli 2020-11-04 15:46 ` Medvedkin, Vladimir 2020-11-04 16:49 ` Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar 2020-11-04 19:34 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-04 18:58 ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar 2020-11-04 19:35 ` Medvedkin, Vladimir 2020-11-05 15:58 ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).