From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 840BBA0562; Fri, 3 Apr 2020 00:10:32 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 584751C07B; Fri, 3 Apr 2020 00:10:32 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 44A341C06C for ; Fri, 3 Apr 2020 00:10:30 +0200 (CEST) IronPort-SDR: TF5gayQ3EJH4gOzPfP9poID5LnXd3iK7CCcgR70DeI1CaoDv5MyKHTXAhPiETS2K/+t7lpAvqk 5DhKG+/e4NpQ== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 02 Apr 2020 15:10:29 -0700 IronPort-SDR: qpw1fvPQfMMlFtXYZtw5NaWF90lmNWsszmaozIZD/b6DeNXzh6q38ayLstJyMO7ltKAK4n0mNg ExMh/9BQAIYA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.72,337,1580803200"; d="scan'208";a="328975146" Received: from sivswdev08.ir.intel.com ([10.237.217.47]) by orsmga001.jf.intel.com with ESMTP; 02 Apr 2020 15:10:26 -0700 From: Konstantin Ananyev To: dev@dpdk.org Cc: honnappa.nagarahalli@arm.com, david.marchand@redhat.com, jielong.zjl@antfin.com, Konstantin Ananyev Date: Thu, 2 Apr 2020 23:09:51 +0100 Message-Id: <20200402220959.29885-2-konstantin.ananyev@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20200402220959.29885-1-konstantin.ananyev@intel.com> References: <20200331164330.28854-1-konstantin.ananyev@intel.com> <20200402220959.29885-1-konstantin.ananyev@intel.com> Subject: [dpdk-dev] [PATCH v2 1/9] test/ring: add contention stress test X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Introduce new test-case to measure ring perfomance under contention (miltiple producers/consumers). Starts dequeue/enqueue loop on all available slave lcores. Signed-off-by: Konstantin Ananyev --- app/test/Makefile | 2 + app/test/meson.build | 2 + app/test/test_ring_mpmc_stress.c | 31 +++ app/test/test_ring_stress.c | 48 ++++ app/test/test_ring_stress.h | 35 +++ app/test/test_ring_stress_impl.h | 444 +++++++++++++++++++++++++++++++ 6 files changed, 562 insertions(+) create mode 100644 app/test/test_ring_mpmc_stress.c create mode 100644 app/test/test_ring_stress.c create mode 100644 app/test/test_ring_stress.h create mode 100644 app/test/test_ring_stress_impl.h diff --git a/app/test/Makefile b/app/test/Makefile index 1f080d162..4eefaa887 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -77,7 +77,9 @@ SRCS-y += test_external_mem.c SRCS-y += test_rand_perf.c SRCS-y += test_ring.c +SRCS-y += test_ring_mpmc_stress.c SRCS-y += test_ring_perf.c +SRCS-y += test_ring_stress.c SRCS-y += test_pmd_perf.c ifeq ($(CONFIG_RTE_LIBRTE_TABLE),y) diff --git a/app/test/meson.build b/app/test/meson.build index 351d29cb6..827b04886 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -100,7 +100,9 @@ test_sources = files('commands.c', 'test_rib.c', 'test_rib6.c', 'test_ring.c', + 'test_ring_mpmc_stress.c', 'test_ring_perf.c', + 'test_ring_stress.c', 'test_rwlock.c', 'test_sched.c', 'test_service_cores.c', diff --git a/app/test/test_ring_mpmc_stress.c b/app/test/test_ring_mpmc_stress.c new file mode 100644 index 000000000..1524b0248 --- /dev/null +++ b/app/test/test_ring_mpmc_stress.c @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "test_ring_stress_impl.h" + +static inline uint32_t +_st_ring_dequeue_bulk(struct rte_ring *r, void **obj, uint32_t n, + uint32_t *avail) +{ + return rte_ring_mc_dequeue_bulk(r, obj, n, avail); +} + +static inline uint32_t +_st_ring_enqueue_bulk(struct rte_ring *r, void * const *obj, uint32_t n, + uint32_t *free) +{ + return rte_ring_mp_enqueue_bulk(r, obj, n, free); +} + +static int +_st_ring_init(struct rte_ring *r, const char *name, uint32_t num) +{ + return rte_ring_init(r, name, num, 0); +} + +const struct test test_ring_mpmc_stress = { + .name = "MP/MC", + .nb_case = RTE_DIM(tests), + .cases = tests, +}; diff --git a/app/test/test_ring_stress.c b/app/test/test_ring_stress.c new file mode 100644 index 000000000..60706f799 --- /dev/null +++ b/app/test/test_ring_stress.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "test_ring_stress.h" + +static int +run_test(const struct test *test) +{ + int32_t rc; + uint32_t i, k; + + for (i = 0, k = 0; i != test->nb_case; i++) { + + printf("TEST-CASE %s %s START\n", + test->name, test->cases[i].name); + + rc = test->cases[i].func(test->cases[i].wfunc); + k += (rc == 0); + + if (rc != 0) + printf("TEST-CASE %s %s FAILED\n", + test->name, test->cases[i].name); + else + printf("TEST-CASE %s %s OK\n", + test->name, test->cases[i].name); + } + + return k; +} + +static int +test_ring_stress(void) +{ + uint32_t n, k; + + n = 0; + k = 0; + + n += test_ring_mpmc_stress.nb_case; + k += run_test(&test_ring_mpmc_stress); + + printf("Number of tests:\t%u\nSuccess:\t%u\nFailed:\t%u\n", + n, k, n - k); + return (k != n); +} + +REGISTER_TEST_COMMAND(ring_stress_autotest, test_ring_stress); diff --git a/app/test/test_ring_stress.h b/app/test/test_ring_stress.h new file mode 100644 index 000000000..60eac6216 --- /dev/null +++ b/app/test/test_ring_stress.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +struct test_case { + const char *name; + int (*func)(int (*)(void *)); + int (*wfunc)(void *arg); +}; + +struct test { + const char *name; + uint32_t nb_case; + const struct test_case *cases; +}; + +extern const struct test test_ring_mpmc_stress; diff --git a/app/test/test_ring_stress_impl.h b/app/test/test_ring_stress_impl.h new file mode 100644 index 000000000..11476d28c --- /dev/null +++ b/app/test/test_ring_stress_impl.h @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "test_ring_stress.h" + +/* + * Measures performance of ring enqueue/dequeue under high contention + */ + +#define RING_NAME "RING_STRESS" +#define BULK_NUM 32 +#define RING_SIZE (2 * BULK_NUM * RTE_MAX_LCORE) + +enum { + WRK_CMD_STOP, + WRK_CMD_RUN, +}; + +static volatile uint32_t wrk_cmd __rte_cache_aligned; + +/* test run-time in seconds */ +static const uint32_t run_time = 60; +static const uint32_t verbose; + +struct lcore_stat { + uint64_t nb_cycle; + struct { + uint64_t nb_call; + uint64_t nb_obj; + uint64_t nb_cycle; + uint64_t max_cycle; + uint64_t min_cycle; + } op; +}; + +struct lcore_arg { + struct rte_ring *rng; + struct lcore_stat stats; +} __rte_cache_aligned; + +struct ring_elem { + uint32_t cnt[RTE_CACHE_LINE_SIZE / sizeof(uint32_t)]; +} __rte_cache_aligned; + +/* + * redefinable functions + */ +static uint32_t +_st_ring_dequeue_bulk(struct rte_ring *r, void **obj, uint32_t n, + uint32_t *avail); + +static uint32_t +_st_ring_enqueue_bulk(struct rte_ring *r, void * const *obj, uint32_t n, + uint32_t *free); + +static int +_st_ring_init(struct rte_ring *r, const char *name, uint32_t num); + + +static void +lcore_stat_update(struct lcore_stat *ls, uint64_t call, uint64_t obj, + uint64_t tm, int32_t prcs) +{ + ls->op.nb_call += call; + ls->op.nb_obj += obj; + ls->op.nb_cycle += tm; + if (prcs) { + ls->op.max_cycle = RTE_MAX(ls->op.max_cycle, tm); + ls->op.min_cycle = RTE_MIN(ls->op.min_cycle, tm); + } +} + +static void +lcore_op_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls) +{ + + ms->op.nb_call += ls->op.nb_call; + ms->op.nb_obj += ls->op.nb_obj; + ms->op.nb_cycle += ls->op.nb_cycle; + ms->op.max_cycle = RTE_MAX(ms->op.max_cycle, ls->op.max_cycle); + ms->op.min_cycle = RTE_MIN(ms->op.min_cycle, ls->op.min_cycle); +} + +static void +lcore_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls) +{ + ms->nb_cycle = RTE_MAX(ms->nb_cycle, ls->nb_cycle); + lcore_op_stat_aggr(ms, ls); +} + +static void +lcore_stat_dump(FILE *f, uint32_t lc, const struct lcore_stat *ls) +{ + long double st; + + st = (long double)rte_get_timer_hz() / US_PER_S; + + if (lc == UINT32_MAX) + fprintf(f, "%s(AGGREGATE)={\n", __func__); + else + fprintf(f, "%s(lc=%u)={\n", __func__, lc); + + fprintf(f, "\tnb_cycle=%" PRIu64 "(%.2Lf usec),\n", + ls->nb_cycle, (long double)ls->nb_cycle / st); + + fprintf(f, "\tDEQ+ENQ={\n"); + + fprintf(f, "\t\tnb_call=%" PRIu64 ",\n", ls->op.nb_call); + fprintf(f, "\t\tnb_obj=%" PRIu64 ",\n", ls->op.nb_obj); + fprintf(f, "\t\tnb_cycle=%" PRIu64 ",\n", ls->op.nb_cycle); + fprintf(f, "\t\tobj/call(avg): %.2Lf\n", + (long double)ls->op.nb_obj / ls->op.nb_call); + fprintf(f, "\t\tcycles/obj(avg): %.2Lf\n", + (long double)ls->op.nb_cycle / ls->op.nb_obj); + fprintf(f, "\t\tcycles/call(avg): %.2Lf\n", + (long double)ls->op.nb_cycle / ls->op.nb_call); + + /* if min/max cycles per call stats was collected */ + if (ls->op.min_cycle != UINT64_MAX) { + fprintf(f, "\t\tmax cycles/call=%" PRIu64 "(%.2Lf usec),\n", + ls->op.max_cycle, + (long double)ls->op.max_cycle / st); + fprintf(f, "\t\tmin cycles/call=%" PRIu64 "(%.2Lf usec),\n", + ls->op.min_cycle, + (long double)ls->op.min_cycle / st); + } + + fprintf(f, "\t},\n"); + fprintf(f, "};\n"); +} + +static void +fill_ring_elm(struct ring_elem *elm, uint32_t fill) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(elm->cnt); i++) + elm->cnt[i] = fill; +} + +static int32_t +check_updt_elem(struct ring_elem *elm[], uint32_t num, + const struct ring_elem *check, const struct ring_elem *fill) +{ + uint32_t i; + + static rte_spinlock_t dump_lock; + + for (i = 0; i != num; i++) { + if (memcmp(check, elm[i], sizeof(*check)) != 0) { + rte_spinlock_lock(&dump_lock); + printf("%s(lc=%u, num=%u) failed at %u-th iter, " + "offending object: %p\n", + __func__, rte_lcore_id(), num, i, elm[i]); + rte_memdump(stdout, "expected", check, sizeof(*check)); + rte_memdump(stdout, "result", elm[i], sizeof(elm[i])); + rte_spinlock_unlock(&dump_lock); + return -EINVAL; + } + memcpy(elm[i], fill, sizeof(*elm[i])); + } + + return 0; +} + +static int +check_ring_op(uint32_t exp, uint32_t res, uint32_t lc, + const char *fname, const char *opname) +{ + if (exp != res) { + printf("%s(lc=%u) failure: %s expected: %u, returned %u\n", + fname, lc, opname, exp, res); + return -ENOSPC; + } + return 0; +} + +static int +test_worker_prcs(void *arg) +{ + int32_t rc; + uint32_t lc, n, num; + uint64_t cl, tm0, tm1; + struct lcore_arg *la; + struct ring_elem def_elm, loc_elm; + struct ring_elem *obj[2 * BULK_NUM]; + + la = arg; + lc = rte_lcore_id(); + + fill_ring_elm(&def_elm, UINT32_MAX); + fill_ring_elm(&loc_elm, lc); + + while (wrk_cmd != WRK_CMD_RUN) { + rte_smp_rmb(); + rte_pause(); + } + + cl = rte_rdtsc_precise(); + + do { + /* num in interval [7/8, 11/8] of BULK_NUM */ + num = 7 * BULK_NUM / 8 + rte_rand() % (BULK_NUM / 2); + + /* reset all pointer values */ + memset(obj, 0, sizeof(obj)); + + /* dequeue num elems */ + tm0 = rte_rdtsc_precise(); + n = _st_ring_dequeue_bulk(la->rng, (void **)obj, num, NULL); + tm0 = rte_rdtsc_precise() - tm0; + + /* check return value and objects */ + rc = check_ring_op(num, n, lc, __func__, + RTE_STR(_st_ring_dequeue_bulk)); + if (rc == 0) + rc = check_updt_elem(obj, num, &def_elm, &loc_elm); + if (rc != 0) + break; + + /* enqueue num elems */ + rte_compiler_barrier(); + rc = check_updt_elem(obj, num, &loc_elm, &def_elm); + if (rc != 0) + break; + + tm1 = rte_rdtsc_precise(); + n = _st_ring_enqueue_bulk(la->rng, (void **)obj, num, NULL); + tm1 = rte_rdtsc_precise() - tm1; + + /* check return value */ + rc = check_ring_op(num, n, lc, __func__, + RTE_STR(_st_ring_enqueue_bulk)); + if (rc != 0) + break; + + lcore_stat_update(&la->stats, 1, num, tm0 + tm1, 1); + + } while (wrk_cmd == WRK_CMD_RUN); + + la->stats.nb_cycle = rte_rdtsc_precise() - cl; + return rc; +} + +static int +test_worker_avg(void *arg) +{ + int32_t rc; + uint32_t lc, n, num; + uint64_t cl; + struct lcore_arg *la; + struct ring_elem def_elm, loc_elm; + struct ring_elem *obj[2 * BULK_NUM]; + + la = arg; + lc = rte_lcore_id(); + + fill_ring_elm(&def_elm, UINT32_MAX); + fill_ring_elm(&loc_elm, lc); + + while (wrk_cmd != WRK_CMD_RUN) { + rte_smp_rmb(); + rte_pause(); + } + + cl = rte_rdtsc_precise(); + + do { + /* num in interval [7/8, 11/8] of BULK_NUM */ + num = 7 * BULK_NUM / 8 + rte_rand() % (BULK_NUM / 2); + + /* reset all pointer values */ + memset(obj, 0, sizeof(obj)); + + /* dequeue num elems */ + n = _st_ring_dequeue_bulk(la->rng, (void **)obj, num, NULL); + + /* check return value and objects */ + rc = check_ring_op(num, n, lc, __func__, + RTE_STR(_st_ring_dequeue_bulk)); + if (rc == 0) + rc = check_updt_elem(obj, num, &def_elm, &loc_elm); + if (rc != 0) + break; + + /* enqueue num elems */ + rte_compiler_barrier(); + rc = check_updt_elem(obj, num, &loc_elm, &def_elm); + if (rc != 0) + break; + + n = _st_ring_enqueue_bulk(la->rng, (void **)obj, num, NULL); + + /* check return value */ + rc = check_ring_op(num, n, lc, __func__, + RTE_STR(_st_ring_enqueue_bulk)); + if (rc != 0) + break; + + lcore_stat_update(&la->stats, 1, num, 0, 0); + + } while (wrk_cmd == WRK_CMD_RUN); + + /* final stats update */ + cl = rte_rdtsc_precise() - cl; + lcore_stat_update(&la->stats, 0, 0, cl, 0); + la->stats.nb_cycle = cl; + + return rc; +} + +static void +mt1_fini(struct rte_ring *rng, void *data) +{ + rte_free(rng); + rte_free(data); +} + +static int +mt1_init(struct rte_ring **rng, void **data, uint32_t num) +{ + int32_t rc; + size_t sz; + uint32_t i, nr; + struct rte_ring *r; + struct ring_elem *elm; + void *p; + + *rng = NULL; + *data = NULL; + + sz = num * sizeof(*elm); + elm = rte_zmalloc(NULL, sz, __alignof__(*elm)); + if (elm == NULL) { + printf("%s: alloc(%zu) for %u elems data failed", + __func__, sz, num); + return -ENOMEM; + } + + *data = elm; + + /* alloc ring */ + nr = 2 * num; + sz = rte_ring_get_memsize(nr); + r = rte_zmalloc(NULL, sz, __alignof__(*r)); + if (r == NULL) { + printf("%s: alloc(%zu) for FIFO with %u elems failed", + __func__, sz, nr); + return -ENOMEM; + } + + *rng = r; + + rc = _st_ring_init(r, RING_NAME, nr); + if (rc != 0) { + printf("%s: _st_ring_init(%p, %u) failed, error: %d(%s)\n", + __func__, r, nr, rc, strerror(-rc)); + return rc; + } + + for (i = 0; i != num; i++) { + fill_ring_elm(elm + i, UINT32_MAX); + p = elm + i; + if (_st_ring_enqueue_bulk(r, &p, 1, NULL) != 1) + break; + } + + if (i != num) { + printf("%s: _st_ring_enqueue(%p, %u) returned %u\n", + __func__, r, num, i); + return -ENOSPC; + } + + return 0; +} + +static int +test_mt1(int (*test)(void *)) +{ + int32_t rc; + uint32_t lc, mc; + struct rte_ring *r; + void *data; + struct lcore_arg arg[RTE_MAX_LCORE]; + + static const struct lcore_stat init_stat = { + .op.min_cycle = UINT64_MAX, + }; + + rc = mt1_init(&r, &data, RING_SIZE); + if (rc != 0) { + mt1_fini(r, data); + return rc; + } + + memset(arg, 0, sizeof(arg)); + + /* launch on all slaves */ + RTE_LCORE_FOREACH_SLAVE(lc) { + arg[lc].rng = r; + arg[lc].stats = init_stat; + rte_eal_remote_launch(test, &arg[lc], lc); + } + + /* signal worker to start test */ + wrk_cmd = WRK_CMD_RUN; + rte_smp_wmb(); + + usleep(run_time * US_PER_S); + + /* signal worker to start test */ + wrk_cmd = WRK_CMD_STOP; + rte_smp_wmb(); + + /* wait for slaves and collect stats. */ + mc = rte_lcore_id(); + arg[mc].stats = init_stat; + + rc = 0; + RTE_LCORE_FOREACH_SLAVE(lc) { + rc |= rte_eal_wait_lcore(lc); + lcore_stat_aggr(&arg[mc].stats, &arg[lc].stats); + if (verbose != 0) + lcore_stat_dump(stdout, lc, &arg[lc].stats); + } + + lcore_stat_dump(stdout, UINT32_MAX, &arg[mc].stats); + mt1_fini(r, data); + return rc; +} + +static const struct test_case tests[] = { + { + .name = "MT-WRK_ENQ_DEQ-MST_NONE-PRCS", + .func = test_mt1, + .wfunc = test_worker_prcs, + }, + { + .name = "MT-WRK_ENQ_DEQ-MST_NONE-AVG", + .func = test_mt1, + .wfunc = test_worker_avg, + }, +}; -- 2.17.1