From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id E4DD1A04F6; Wed, 11 Dec 2019 16:51:56 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id B25052C6A; Wed, 11 Dec 2019 16:51:56 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id EFA4D1D9E for ; Wed, 11 Dec 2019 16:51:53 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 11 Dec 2019 07:51:42 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,301,1571727600"; d="scan'208";a="220487724" Received: from atrybulx-mobl.ger.corp.intel.com (HELO build-VirtualBox.isw.intel.com) ([10.103.104.115]) by fmsmga001.fm.intel.com with ESMTP; 11 Dec 2019 07:51:40 -0800 From: Artur Trybula To: dev@dpdk.org, fiona.trahe@intel.com, shallyv@marvell.com, adamx.dybkowski@intel.com, marcinx.danilewicz@intel.com, arturx.trybula@intel.com, akhil.goyal@nxp.com Date: Wed, 11 Dec 2019 16:50:00 +0100 Message-Id: <20191211155000.26610-1-arturx.trybula@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit adds cycle-count mode to the compression perf tool. The new mode enhances the compression performance tool to allow cycle-count measurement of both hardware and softwate PMDs. Signed-off-by: Artur Trybula --- app/test-compress-perf/Makefile | 3 +- app/test-compress-perf/comp_perf.h | 6 +- app/test-compress-perf/comp_perf_options.h | 7 +- .../comp_perf_options_parse.c | 35 +- .../comp_perf_test_common.c | 23 +- .../comp_perf_test_common.h | 2 +- .../comp_perf_test_cyclecount.c | 614 ++++++++++++++++++ .../comp_perf_test_cyclecount.h | 24 + ...enchmark.c => comp_perf_test_throughput.c} | 10 +- ...enchmark.h => comp_perf_test_throughput.h} | 6 +- .../comp_perf_test_verify.c | 4 +- app/test-compress-perf/main.c | 65 +- app/test-compress-perf/meson.build | 3 +- 13 files changed, 755 insertions(+), 47 deletions(-) create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.c create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.h rename app/test-compress-perf/{comp_perf_test_benchmark.c => comp_perf_test_throughput.c} (97%) rename app/test-compress-perf/{comp_perf_test_benchmark.h => comp_perf_test_throughput.h} (80%) diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile index d1a6820e6..ad3b91d0a 100644 --- a/app/test-compress-perf/Makefile +++ b/app/test-compress-perf/Makefile @@ -13,7 +13,8 @@ CFLAGS += -O3 SRCS-y := main.c SRCS-y += comp_perf_options_parse.c SRCS-y += comp_perf_test_verify.c -SRCS-y += comp_perf_test_benchmark.c +SRCS-y += comp_perf_test_throughput.c +SRCS-y += comp_perf_test_cyclecount.c SRCS-y += comp_perf_test_common.c include $(RTE_SDK)/mk/rte.app.mk diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h index 57289b07a..997d46b59 100644 --- a/app/test-compress-perf/comp_perf.h +++ b/app/test-compress-perf/comp_perf.h @@ -26,15 +26,15 @@ struct cperf_test { /* Needed for weak functions*/ void * -cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, uint16_t qp_id __rte_unused, struct comp_test_data *options __rte_unused); void -cperf_benchmark_test_destructor(void *arg __rte_unused); +cperf_throughput_test_destructor(void *arg __rte_unused); int -cperf_benchmark_test_runner(void *test_ctx __rte_unused); +cperf_throughput_test_runner(void *test_ctx __rte_unused); void * cperf_verify_test_constructor(uint8_t dev_id __rte_unused, diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h index 2c26511ef..0b777521c 100644 --- a/app/test-compress-perf/comp_perf_options.h +++ b/app/test-compress-perf/comp_perf_options.h @@ -24,8 +24,9 @@ enum cleanup_st { }; enum cperf_test_type { - CPERF_TEST_TYPE_BENCHMARK, - CPERF_TEST_TYPE_VERIFY + CPERF_TEST_TYPE_THROUGHPUT, + CPERF_TEST_TYPE_VERIFY, + CPERF_TEST_TYPE_PMDCC }; enum comp_operation { @@ -68,6 +69,8 @@ struct comp_test_data { double ratio; enum cleanup_st cleanup; int perf_comp_force_stop; + + uint32_t cyclecount_delay; }; int diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c index 12d0a6caf..04a8d2fbe 100644 --- a/app/test-compress-perf/comp_perf_options_parse.c +++ b/app/test-compress-perf/comp_perf_options_parse.c @@ -30,6 +30,9 @@ #define CPERF_WINDOW_SIZE ("window-sz") #define CPERF_EXTERNAL_MBUFS ("external-mbufs") +/* cyclecount-specific options */ +#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us") + struct name_id_map { const char *name; uint32_t id; @@ -39,7 +42,7 @@ static void usage(char *progname) { printf("%s [EAL options] --\n" - " --ptest benchmark / verify :" + " --ptest throughput / verify / pmd-cyclecount\n" " --driver-name NAME: compress driver to use\n" " --input-file NAME: file to compress and decompress\n" " --extended-input-sz N: extend file data up to this size (default: no extension)\n" @@ -61,6 +64,8 @@ usage(char *progname) " (e.g.: 15 => 32k, default: max supported by PMD)\n" " --external-mbufs: use memzones as external buffers instead of\n" " keeping the data directly in mbuf area\n" + " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n" + " valid only for cyclecount perf test (default: 500 us)\n" " -h: prints this help\n", progname); } @@ -85,12 +90,16 @@ parse_cperf_test_type(struct comp_test_data *test_data, const char *arg) { struct name_id_map cperftest_namemap[] = { { - comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK], - CPERF_TEST_TYPE_BENCHMARK + comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT], + CPERF_TEST_TYPE_THROUGHPUT }, { comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY], CPERF_TEST_TYPE_VERIFY + }, + { + comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC], + CPERF_TEST_TYPE_PMDCC } }; @@ -531,17 +540,28 @@ parse_external_mbufs(struct comp_test_data *test_data, return 0; } +static int +parse_cyclecount_delay_us(struct comp_test_data *test_data, + const char *arg) +{ + int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg); + + if (ret) { + RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n"); + return -1; + } + return 0; +} + typedef int (*option_parser_t)(struct comp_test_data *test_data, const char *arg); struct long_opt_parser { const char *lgopt_name; option_parser_t parser_fn; - }; static struct option lgopts[] = { - { CPERF_PTEST_TYPE, required_argument, 0, 0 }, { CPERF_DRIVER_NAME, required_argument, 0, 0 }, { CPERF_TEST_FILE, required_argument, 0, 0 }, @@ -556,6 +576,7 @@ static struct option lgopts[] = { { CPERF_LEVEL, required_argument, 0, 0 }, { CPERF_WINDOW_SIZE, required_argument, 0, 0 }, { CPERF_EXTERNAL_MBUFS, 0, 0, 0 }, + { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 }, { NULL, 0, 0, 0 } }; @@ -577,6 +598,7 @@ comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data) { CPERF_LEVEL, parse_level }, { CPERF_WINDOW_SIZE, parse_window_sz }, { CPERF_EXTERNAL_MBUFS, parse_external_mbufs }, + { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us }, }; unsigned int i; @@ -631,8 +653,9 @@ comp_perf_options_default(struct comp_test_data *test_data) test_data->level_lst.min = RTE_COMP_LEVEL_MIN; test_data->level_lst.max = RTE_COMP_LEVEL_MAX; test_data->level_lst.inc = 1; - test_data->test = CPERF_TEST_TYPE_BENCHMARK; + test_data->test = CPERF_TEST_TYPE_THROUGHPUT; test_data->use_external_mbufs = 0; + test_data->cyclecount_delay = 500; } int diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c index 1b8985b43..b402a0d83 100644 --- a/app/test-compress-perf/comp_perf_test_common.c +++ b/app/test-compress-perf/comp_perf_test_common.c @@ -9,7 +9,8 @@ #include "comp_perf.h" #include "comp_perf_options.h" -#include "comp_perf_test_benchmark.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_cyclecount.h" #include "comp_perf_test_common.h" #include "comp_perf_test_verify.h" @@ -276,9 +277,11 @@ comp_perf_allocate_memory(struct comp_test_data *test_data, snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u", mem->dev_id, mem->qp_id); + + /* one mempool for both src and dst mbufs */ mem->op_pool = rte_comp_op_pool_create(pool_name, - mem->total_bufs, - 0, 0, rte_socket_id()); + mem->total_bufs * 2, + 0, 0, rte_socket_id()); if (mem->op_pool == NULL) { RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n"); return -1; @@ -495,20 +498,24 @@ prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem) } void -print_test_dynamics(void) +print_test_dynamics(const struct comp_test_data *test_data) { uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz, MAX_SEG_SIZE); if (buffer_info.total_buffs > 1) { - printf("\nWarning: for the current input parameters, number" + if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) { + printf("\nWarning: for the current input parameters, number" " of ops is higher than one, which may result" " in sub-optimal performance.\n"); - printf("To improve the performance (for the current" + printf("To improve the performance (for the current" " input data) following parameters are" " suggested:\n"); - printf(" * Segment size: %d\n", MAX_SEG_SIZE); - printf(" * Number of segments: %u\n", opt_total_segs); + printf(" * Segment size: %d\n", + MAX_SEG_SIZE); + printf(" * Number of segments: %u\n", + opt_total_segs); + } } else if (buffer_info.total_buffs == 1) { printf("\nInfo: there is only one op with %u segments -" " the compression ratio is the best.\n", diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h index 920642888..72705c6a2 100644 --- a/app/test-compress-perf/comp_perf_test_common.h +++ b/app/test-compress-perf/comp_perf_test_common.h @@ -49,6 +49,6 @@ int prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem); void -print_test_dynamics(void); +print_test_dynamics(const struct comp_test_data *test_data); #endif /* _COMP_PERF_TEST_COMMON_H_ */ diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c new file mode 100644 index 000000000..55559a7d5 --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_cyclecount.c @@ -0,0 +1,614 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include "rte_spinlock.h" +#include + +#include "comp_perf_test_cyclecount.h" + +struct cperf_cyclecount_ctx { + struct cperf_verify_ctx ver; + + uint32_t ops_enq_retries; + uint32_t ops_deq_retries; + + uint64_t duration_op; + uint64_t duration_enq; + uint64_t duration_deq; +}; + +void +cperf_cyclecount_test_destructor(void *arg) +{ + struct cperf_cyclecount_ctx *ctx = arg; + + if (arg) { + comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); + rte_free(arg); + } +} + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options) +{ + struct cperf_cyclecount_ctx *ctx = NULL; + + ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); + + if (ctx == NULL) + return NULL; + + ctx->ver.mem.dev_id = dev_id; + ctx->ver.mem.qp_id = qp_id; + ctx->ver.options = options; + ctx->ver.silent = 1; /* ver. part will be silent */ + + if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) + && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) + return ctx; + + cperf_cyclecount_test_destructor(ctx); + return NULL; +} + +static int +cperf_cyclecount_op_setup(struct rte_comp_op **ops, + struct cperf_cyclecount_ctx *ctx, + struct rte_mbuf **input_bufs, + struct rte_mbuf **output_bufs, + void *priv_xform, + uint32_t out_seg_sz) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + + uint32_t i, iter, num_iter; + int res = 0; + uint16_t ops_needed; + + num_iter = test_data->num_iter; + + for (iter = 0; iter < num_iter; iter++) { + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + ops_needed = num_ops; + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Cyclecount: could not allocate enough operations\n"); + res = -1; + goto end; + } + + for (i = 0; i < ops_needed; i++) { + + /* Calculate next buffer to attach */ + /* to operation */ + uint32_t buf_id = total_enq_ops + i; + uint16_t op_id = i; + + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + /* E N Q U E U I N G */ + /* assuming that all ops are enqueued */ + /* instead of the real enqueue operation */ + num_enq = num_ops; + + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + /* D E Q U E U I N G */ + /* assuming that all ops dequeued */ + /* instead of the real dequeue operation */ + num_deq = num_ops; + + total_deq_ops += num_deq; + rte_mempool_put_bulk(mem->op_pool, + (void **)ops, num_deq); + } + } + return res; +end: + rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); + rte_free(ops); + + return res; +} + +static int +main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) +{ + struct comp_test_data *test_data = ctx->ver.options; + struct cperf_mem_resources *mem = &ctx->ver.mem; + uint8_t dev_id = mem->dev_id; + uint32_t i, iter, num_iter; + struct rte_comp_op **ops, **deq_ops; + void *priv_xform = NULL; + struct rte_comp_xform xform; + struct rte_mbuf **input_bufs, **output_bufs; + int ret, res = 0; + int allocated = 0; + uint32_t out_seg_sz; + + uint64_t tsc_start, tsc_end, tsc_duration; + + if (test_data == NULL || !test_data->burst_sz) { + RTE_LOG(ERR, USER1, "Unknown burst size\n"); + return -1; + } + ctx->duration_enq = 0; + ctx->duration_deq = 0; + ctx->ops_enq_retries = 0; + ctx->ops_deq_retries = 0; + + /* one array for both enqueue and dequeue */ + ops = rte_zmalloc_socket(NULL, + 2 * mem->total_bufs * sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, + "Can't allocate memory for ops strucures\n"); + return -1; + } + + deq_ops = &ops[mem->total_bufs]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_COMPRESS, + .compress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .deflate.huffman = test_data->huffman_enc, + .level = test_data->level, + .window_size = test_data->window_sz, + .chksum = RTE_COMP_CHECKSUM_NONE, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->decomp_bufs; + output_bufs = mem->comp_bufs; + out_seg_sz = test_data->out_seg_sz; + } else { + xform = (struct rte_comp_xform) { + .type = RTE_COMP_DECOMPRESS, + .decompress = { + .algo = RTE_COMP_ALGO_DEFLATE, + .chksum = RTE_COMP_CHECKSUM_NONE, + .window_size = test_data->window_sz, + .hash_algo = RTE_COMP_HASH_ALGO_NONE + } + }; + input_bufs = mem->comp_bufs; + output_bufs = mem->decomp_bufs; + out_seg_sz = test_data->seg_sz; + } + + /* Create private xform */ + if (rte_compressdev_private_xform_create(dev_id, &xform, + &priv_xform) < 0) { + RTE_LOG(ERR, USER1, "Private xform could not be created\n"); + res = -1; + goto end; + } + + tsc_start = rte_rdtsc_precise(); + ret = cperf_cyclecount_op_setup(ops, + ctx, + input_bufs, + output_bufs, + priv_xform, + out_seg_sz); + + tsc_end = rte_rdtsc_precise(); + + /* ret value check postponed a bit to cancel extra 'if' bias */ + if (ret < 0) { + RTE_LOG(ERR, USER1, "Setup function failed\n"); + res = -1; + goto end; + } + + tsc_duration = tsc_end - tsc_start; + ctx->duration_op = tsc_duration; + + num_iter = test_data->num_iter; + for (iter = 0; iter < num_iter; iter++) { + uint32_t total_ops = mem->total_bufs; + uint32_t remaining_ops = mem->total_bufs; + uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t ops_unused = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; + + while (remaining_ops > 0) { + uint16_t num_ops = RTE_MIN(remaining_ops, + test_data->burst_sz); + uint16_t ops_needed = num_ops - ops_unused; + + /* + * Move the unused operations from the previous + * enqueue_burst call to the front, to maintain order + */ + if ((ops_unused > 0) && (num_enq > 0)) { + size_t nb_b_to_mov = + ops_unused * sizeof(struct rte_comp_op *); + + memmove(ops, &ops[num_enq], nb_b_to_mov); + } + + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, + (void **)ops, + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); + res = -1; + goto end; + } + allocated += ops_needed; + + for (i = 0; i < ops_needed; i++) { + /* + * Calculate next buffer to attach to operation + */ + uint32_t buf_id = total_enq_ops + i + + ops_unused; + uint16_t op_id = ops_unused + i; + /* Reset all data in output buffers */ + struct rte_mbuf *m = output_bufs[buf_id]; + + m->pkt_len = out_seg_sz * m->nb_segs; + while (m) { + m->data_len = m->buf_len - m->data_off; + m = m->next; + } + ops[op_id]->m_src = input_bufs[buf_id]; + ops[op_id]->m_dst = output_bufs[buf_id]; + ops[op_id]->src.offset = 0; + ops[op_id]->src.length = + rte_pktmbuf_pkt_len(input_bufs[buf_id]); + ops[op_id]->dst.offset = 0; + ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; + ops[op_id]->input_chksum = buf_id; + ops[op_id]->private_xform = priv_xform; + } + + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_enq = rte_compressdev_enqueue_burst(dev_id, + mem->qp_id, ops, + num_ops); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_enq += tsc_duration; + + if (num_enq < num_ops) + ctx->ops_enq_retries++; + + if (test_data->cyclecount_delay) + rte_delay_us_block(test_data->cyclecount_delay); + + if (num_enq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.enqueue_err_count) { + res = -1; + goto end; + } + } + + ops_unused = num_ops - num_enq; + remaining_ops -= num_enq; + total_enq_ops += num_enq; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + allocated); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + + if (num_deq < allocated) + ctx->ops_deq_retries++; + + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + + /* Dequeue the last operations */ + while (total_deq_ops < total_ops) { + if (unlikely(test_data->perf_comp_force_stop)) + goto end; + + tsc_start = rte_rdtsc_precise(); + num_deq = rte_compressdev_dequeue_burst(dev_id, + mem->qp_id, + deq_ops, + test_data->burst_sz); + tsc_end = rte_rdtsc_precise(); + tsc_duration = tsc_end - tsc_start; + ctx->duration_deq += tsc_duration; + ctx->ops_deq_retries++; + + if (num_deq == 0) { + struct rte_compressdev_stats stats; + + rte_compressdev_stats_get(dev_id, &stats); + if (stats.dequeue_err_count) { + res = -1; + goto end; + } + } + total_deq_ops += num_deq; + + if (iter == num_iter - 1) { + for (i = 0; i < num_deq; i++) { + struct rte_comp_op *op = deq_ops[i]; + + if (op->status != + RTE_COMP_OP_STATUS_SUCCESS) { + RTE_LOG(ERR, USER1, "Some operations were not successful\n"); + goto end; + } + + struct rte_mbuf *m = op->m_dst; + + m->pkt_len = op->produced; + uint32_t remaining_data = op->produced; + uint16_t data_to_append; + + while (remaining_data > 0) { + data_to_append = + RTE_MIN(remaining_data, + out_seg_sz); + m->data_len = data_to_append; + remaining_data -= + data_to_append; + m = m->next; + } + } + } + rte_mempool_put_bulk(mem->op_pool, + (void **)deq_ops, num_deq); + allocated -= num_deq; + } + } + allocated = 0; + +end: + if (allocated) + rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); + rte_compressdev_private_xform_free(dev_id, priv_xform); + rte_free(ops); + + if (test_data->perf_comp_force_stop) { + RTE_LOG(ERR, USER1, + "lcore: %d Perf. test has been aborted by user\n", + mem->lcore_id); + res = -1; + } + return res; +} + +int +cperf_cyclecount_test_runner(void *test_ctx) +{ + struct cperf_cyclecount_ctx *ctx = test_ctx; + struct comp_test_data *test_data = ctx->ver.options; + uint32_t lcore = rte_lcore_id(); + static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); + static rte_spinlock_t print_spinlock; + int i; + + uint32_t ops_enq_retries_comp; + uint32_t ops_deq_retries_comp; + + uint32_t ops_enq_retries_decomp; + uint32_t ops_deq_retries_decomp; + + uint32_t duration_setup_per_op; + + uint32_t duration_enq_per_op_comp; + uint32_t duration_deq_per_op_comp; + + uint32_t duration_enq_per_op_decomp; + uint32_t duration_deq_per_op_decomp; + + ctx->ver.mem.lcore_id = lcore; + + /* + * printing information about current compression thread + */ + if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) + printf(" lcore: %u," + " driver name: %s," + " device name: %s," + " device id: %u," + " socket id: %u," + " queue pair id: %u\n", + lcore, + ctx->ver.options->driver_name, + rte_compressdev_name_get(ctx->ver.mem.dev_id), + ctx->ver.mem.dev_id, + rte_compressdev_socket_id(ctx->ver.mem.dev_id), + ctx->ver.mem.qp_id); + + /* + * First the verification part is needed + */ + if (cperf_verify_test_runner(&ctx->ver)) + return EXIT_FAILURE; + + /* + * Run the tests twice, discarding the first performance + * results, before the cache is warmed up + */ + + /* C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_comp = ctx->ops_enq_retries; + ops_deq_retries_comp = ctx->ops_deq_retries; + + duration_enq_per_op_comp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_comp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* D E C O M P R E S S */ + for (i = 0; i < 2; i++) { + if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) + return EXIT_FAILURE; + } + + ops_enq_retries_decomp = ctx->ops_enq_retries; + ops_deq_retries_decomp = ctx->ops_deq_retries; + + duration_enq_per_op_decomp = ctx->duration_enq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + duration_deq_per_op_decomp = ctx->duration_deq / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + duration_setup_per_op = ctx->duration_op / + (ctx->ver.mem.total_bufs * test_data->num_iter); + + /* R E P O R T processing */ + if (rte_atomic16_test_and_set(&display_once)) { + + rte_spinlock_lock(&print_spinlock); + + printf("\nLegend for the table\n" + " - Retries section: number of retries for the following operations:\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n" + " - Cycles section: number of cycles per 'op' for the following operations:\n" + " setup/op - memory allocation, op configuration and memory dealocation\n" + " [C-e] - compression enqueue\n" + " [C-d] - compression dequeue\n" + " [D-e] - decompression enqueue\n" + " [D-d] - decompression dequeue\n\n"); + + printf("\n%12s%6s%12s%17s", + "lcore id", "Level", "Comp size", "Comp ratio [%]"); + + printf(" |%10s %6s %8s %6s %8s", + " Retries:", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + printf(" |%9s %9s %9s %9s %9s %9s\n", + " Cycles:", + "setup/op", + "[C-e]", "[C-d]", + "[D-e]", "[D-d]"); + + rte_spinlock_unlock(&print_spinlock); + } + + rte_spinlock_lock(&print_spinlock); + + printf("%12u" + "%6u" + "%12zu" + "%17.2f", + ctx->ver.mem.lcore_id, + test_data->level, + ctx->ver.comp_data_sz, + ctx->ver.ratio); + + printf(" |%10s %6u %8u %6u %8u", + " ", + ops_enq_retries_comp, + ops_deq_retries_comp, + ops_enq_retries_decomp, + ops_deq_retries_decomp); + + printf(" |%9s %9u %9u %9u %9u %9u\n", + " ", + duration_setup_per_op, + duration_enq_per_op_comp, + duration_deq_per_op_comp, + duration_enq_per_op_decomp, + duration_deq_per_op_decomp); + + rte_spinlock_unlock(&print_spinlock); + + return EXIT_SUCCESS; +} diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.h b/app/test-compress-perf/comp_perf_test_cyclecount.h new file mode 100644 index 000000000..8e1b4d9e9 --- /dev/null +++ b/app/test-compress-perf/comp_perf_test_cyclecount.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _COMP_PERF_TEST_CYCLECOUNT_ +#define _COMP_PERF_TEST_CYCLECOUNT_ + +#include + +#include "comp_perf_options.h" +#include "comp_perf_test_common.h" +#include "comp_perf_test_verify.h" + +void +cperf_cyclecount_test_destructor(void *arg); + +int +cperf_cyclecount_test_runner(void *test_ctx); + +void * +cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, + struct comp_test_data *options); + +#endif diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_throughput.c similarity index 97% rename from app/test-compress-perf/comp_perf_test_benchmark.c rename to app/test-compress-perf/comp_perf_test_throughput.c index 0c6bb9b45..13922b658 100644 --- a/app/test-compress-perf/comp_perf_test_benchmark.c +++ b/app/test-compress-perf/comp_perf_test_throughput.c @@ -8,10 +8,10 @@ #include #include -#include "comp_perf_test_benchmark.h" +#include "comp_perf_test_throughput.h" void -cperf_benchmark_test_destructor(void *arg) +cperf_throughput_test_destructor(void *arg) { if (arg) { comp_perf_free_memory( @@ -22,7 +22,7 @@ cperf_benchmark_test_destructor(void *arg) } void * -cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, struct comp_test_data *options) { struct cperf_benchmark_ctx *ctx = NULL; @@ -41,7 +41,7 @@ cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) return ctx; - cperf_benchmark_test_destructor(ctx); + cperf_throughput_test_destructor(ctx); return NULL; } @@ -324,7 +324,7 @@ main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) } int -cperf_benchmark_test_runner(void *test_ctx) +cperf_throughput_test_runner(void *test_ctx) { struct cperf_benchmark_ctx *ctx = test_ctx; struct comp_test_data *test_data = ctx->ver.options; diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_throughput.h similarity index 80% rename from app/test-compress-perf/comp_perf_test_benchmark.h rename to app/test-compress-perf/comp_perf_test_throughput.h index d9b2694b8..467e3aa78 100644 --- a/app/test-compress-perf/comp_perf_test_benchmark.h +++ b/app/test-compress-perf/comp_perf_test_throughput.h @@ -24,13 +24,13 @@ struct cperf_benchmark_ctx { }; void -cperf_benchmark_test_destructor(void *arg); +cperf_throughput_test_destructor(void *arg); int -cperf_benchmark_test_runner(void *test_ctx); +cperf_throughput_test_runner(void *test_ctx); void * -cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id, +cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, struct comp_test_data *options); #endif diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c index 758a22ff5..5e13257b7 100644 --- a/app/test-compress-perf/comp_perf_test_verify.c +++ b/app/test-compress-perf/comp_perf_test_verify.c @@ -48,8 +48,8 @@ static int main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) { struct comp_test_data *test_data = ctx->options; - uint8_t *output_data_ptr; - size_t *output_data_sz; + uint8_t *output_data_ptr = NULL; + size_t *output_data_sz = NULL; struct cperf_mem_resources *mem = &ctx->mem; uint8_t dev_id = mem->dev_id; diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c index 6b56dd680..ed21605d8 100644 --- a/app/test-compress-perf/main.c +++ b/app/test-compress-perf/main.c @@ -11,32 +11,41 @@ #include #include -#include "comp_perf_options.h" -#include "comp_perf_test_verify.h" -#include "comp_perf_test_benchmark.h" #include "comp_perf.h" +#include "comp_perf_options.h" #include "comp_perf_test_common.h" +#include "comp_perf_test_cyclecount.h" +#include "comp_perf_test_throughput.h" +#include "comp_perf_test_verify.h" #define NUM_MAX_XFORMS 16 #define NUM_MAX_INFLIGHT_OPS 512 __extension__ const char *comp_perf_test_type_strs[] = { - [CPERF_TEST_TYPE_BENCHMARK] = "benchmark", - [CPERF_TEST_TYPE_VERIFY] = "verify" + [CPERF_TEST_TYPE_THROUGHPUT] = "throughput", + [CPERF_TEST_TYPE_VERIFY] = "verify", + [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount" }; __extension__ static const struct cperf_test cperf_testmap[] = { - [CPERF_TEST_TYPE_BENCHMARK] = { - cperf_benchmark_test_constructor, - cperf_benchmark_test_runner, - cperf_benchmark_test_destructor + [CPERF_TEST_TYPE_THROUGHPUT] = { + cperf_throughput_test_constructor, + cperf_throughput_test_runner, + cperf_throughput_test_destructor + }, [CPERF_TEST_TYPE_VERIFY] = { cperf_verify_test_constructor, cperf_verify_test_runner, cperf_verify_test_destructor + }, + + [CPERF_TEST_TYPE_PMDCC] = { + cperf_cyclecount_test_constructor, + cperf_cyclecount_test_runner, + cperf_cyclecount_test_destructor } }; @@ -116,7 +125,8 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data, enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name, enabled_cdevs, RTE_COMPRESS_MAX_DEVS); if (enabled_cdev_count == 0) { - RTE_LOG(ERR, USER1, "No compress devices type %s available\n", + RTE_LOG(ERR, USER1, "No compress devices type %s available," + " please check the list of specified devices in EAL section\n", test_data->driver_name); return -EINVAL; } @@ -270,6 +280,7 @@ comp_perf_dump_input_data(struct comp_test_data *test_data) data += data_to_read; } + printf("\n"); if (test_data->input_data_sz > actual_file_sz) RTE_LOG(INFO, USER1, "%zu bytes read from file %s, extending the file %.2f times\n", @@ -365,9 +376,12 @@ main(int argc, char **argv) else test_data->level = test_data->level_lst.list[0]; - printf("App uses socket: %u\n", rte_socket_id()); + printf("\nApp uses socket: %u\n", rte_socket_id()); printf("Burst size = %u\n", test_data->burst_sz); printf("Input data size = %zu\n", test_data->input_data_sz); + if (test_data->test == CPERF_TEST_TYPE_PMDCC) + printf("Cycle-count delay = %u [us]\n", + test_data->cyclecount_delay); test_data->cleanup = ST_DURING_TEST; total_nb_qps = nb_compressdevs * test_data->nb_qps; @@ -394,7 +408,7 @@ main(int argc, char **argv) i++; } - print_test_dynamics(); /* constructors must be executed first */ + print_test_dynamics(test_data); while (test_data->level <= test_data->level_lst.max) { @@ -472,7 +486,28 @@ main(int argc, char **argv) } __rte_weak void * -cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, +cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused, + uint16_t qp_id __rte_unused, + struct comp_test_data *options __rte_unused) +{ + RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n"); + return NULL; +} + +__rte_weak void +cperf_cyclecount_test_destructor(void *arg __rte_unused) +{ + RTE_LOG(INFO, USER1, "Something wrong happened!!!\n"); +} + +__rte_weak int +cperf_cyclecount_test_runner(void *test_ctx __rte_unused) +{ + return 0; +} + +__rte_weak void * +cperf_throughput_test_constructor(uint8_t dev_id __rte_unused, uint16_t qp_id __rte_unused, struct comp_test_data *options __rte_unused) { @@ -481,13 +516,13 @@ cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused, } __rte_weak void -cperf_benchmark_test_destructor(void *arg __rte_unused) +cperf_throughput_test_destructor(void *arg __rte_unused) { } __rte_weak int -cperf_benchmark_test_runner(void *test_ctx __rte_unused) +cperf_throughput_test_runner(void *test_ctx __rte_unused) { return 0; } diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build index 1136f04bc..1fe26cc14 100644 --- a/app/test-compress-perf/meson.build +++ b/app/test-compress-perf/meson.build @@ -5,6 +5,7 @@ allow_experimental_apis = true sources = files('comp_perf_options_parse.c', 'main.c', 'comp_perf_test_verify.c', - 'comp_perf_test_benchmark.c', + 'comp_perf_test_throughput.c', + 'comp_perf_test_cyclecount.c', 'comp_perf_test_common.c') deps = ['compressdev'] -- 2.17.1