* [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool
@ 2019-12-11 15:50 Artur Trybula
2020-01-13 15:18 ` Trahe, Fiona
0 siblings, 1 reply; 3+ messages in thread
From: Artur Trybula @ 2019-12-11 15:50 UTC (permalink / raw)
To: dev, fiona.trahe, shallyv, adamx.dybkowski, marcinx.danilewicz,
arturx.trybula, akhil.goyal
This commit adds cycle-count mode to the compression perf tool.
The new mode enhances the compression performance tool to allow
cycle-count measurement of both hardware and softwate PMDs.
Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
---
app/test-compress-perf/Makefile | 3 +-
app/test-compress-perf/comp_perf.h | 6 +-
app/test-compress-perf/comp_perf_options.h | 7 +-
.../comp_perf_options_parse.c | 35 +-
.../comp_perf_test_common.c | 23 +-
.../comp_perf_test_common.h | 2 +-
.../comp_perf_test_cyclecount.c | 614 ++++++++++++++++++
.../comp_perf_test_cyclecount.h | 24 +
...enchmark.c => comp_perf_test_throughput.c} | 10 +-
...enchmark.h => comp_perf_test_throughput.h} | 6 +-
.../comp_perf_test_verify.c | 4 +-
app/test-compress-perf/main.c | 65 +-
app/test-compress-perf/meson.build | 3 +-
13 files changed, 755 insertions(+), 47 deletions(-)
create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.c
create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.h
rename app/test-compress-perf/{comp_perf_test_benchmark.c => comp_perf_test_throughput.c} (97%)
rename app/test-compress-perf/{comp_perf_test_benchmark.h => comp_perf_test_throughput.h} (80%)
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index d1a6820e6..ad3b91d0a 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -13,7 +13,8 @@ CFLAGS += -O3
SRCS-y := main.c
SRCS-y += comp_perf_options_parse.c
SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_throughput.c
+SRCS-y += comp_perf_test_cyclecount.c
SRCS-y += comp_perf_test_common.c
include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h
index 57289b07a..997d46b59 100644
--- a/app/test-compress-perf/comp_perf.h
+++ b/app/test-compress-perf/comp_perf.h
@@ -26,15 +26,15 @@ struct cperf_test {
/* Needed for weak functions*/
void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
-cperf_benchmark_test_destructor(void *arg __rte_unused);
+cperf_throughput_test_destructor(void *arg __rte_unused);
int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+cperf_throughput_test_runner(void *test_ctx __rte_unused);
void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index 2c26511ef..0b777521c 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -24,8 +24,9 @@ enum cleanup_st {
};
enum cperf_test_type {
- CPERF_TEST_TYPE_BENCHMARK,
- CPERF_TEST_TYPE_VERIFY
+ CPERF_TEST_TYPE_THROUGHPUT,
+ CPERF_TEST_TYPE_VERIFY,
+ CPERF_TEST_TYPE_PMDCC
};
enum comp_operation {
@@ -68,6 +69,8 @@ struct comp_test_data {
double ratio;
enum cleanup_st cleanup;
int perf_comp_force_stop;
+
+ uint32_t cyclecount_delay;
};
int
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 12d0a6caf..04a8d2fbe 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -30,6 +30,9 @@
#define CPERF_WINDOW_SIZE ("window-sz")
#define CPERF_EXTERNAL_MBUFS ("external-mbufs")
+/* cyclecount-specific options */
+#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us")
+
struct name_id_map {
const char *name;
uint32_t id;
@@ -39,7 +42,7 @@ static void
usage(char *progname)
{
printf("%s [EAL options] --\n"
- " --ptest benchmark / verify :"
+ " --ptest throughput / verify / pmd-cyclecount\n"
" --driver-name NAME: compress driver to use\n"
" --input-file NAME: file to compress and decompress\n"
" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
@@ -61,6 +64,8 @@ usage(char *progname)
" (e.g.: 15 => 32k, default: max supported by PMD)\n"
" --external-mbufs: use memzones as external buffers instead of\n"
" keeping the data directly in mbuf area\n"
+ " --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n"
+ " valid only for cyclecount perf test (default: 500 us)\n"
" -h: prints this help\n",
progname);
}
@@ -85,12 +90,16 @@ parse_cperf_test_type(struct comp_test_data *test_data, const char *arg)
{
struct name_id_map cperftest_namemap[] = {
{
- comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK],
- CPERF_TEST_TYPE_BENCHMARK
+ comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT],
+ CPERF_TEST_TYPE_THROUGHPUT
},
{
comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY],
CPERF_TEST_TYPE_VERIFY
+ },
+ {
+ comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC],
+ CPERF_TEST_TYPE_PMDCC
}
};
@@ -531,17 +540,28 @@ parse_external_mbufs(struct comp_test_data *test_data,
return 0;
}
+static int
+parse_cyclecount_delay_us(struct comp_test_data *test_data,
+ const char *arg)
+{
+ int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg);
+
+ if (ret) {
+ RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n");
+ return -1;
+ }
+ return 0;
+}
+
typedef int (*option_parser_t)(struct comp_test_data *test_data,
const char *arg);
struct long_opt_parser {
const char *lgopt_name;
option_parser_t parser_fn;
-
};
static struct option lgopts[] = {
-
{ CPERF_PTEST_TYPE, required_argument, 0, 0 },
{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
{ CPERF_TEST_FILE, required_argument, 0, 0 },
@@ -556,6 +576,7 @@ static struct option lgopts[] = {
{ CPERF_LEVEL, required_argument, 0, 0 },
{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
{ CPERF_EXTERNAL_MBUFS, 0, 0, 0 },
+ { CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 },
{ NULL, 0, 0, 0 }
};
@@ -577,6 +598,7 @@ comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
{ CPERF_LEVEL, parse_level },
{ CPERF_WINDOW_SIZE, parse_window_sz },
{ CPERF_EXTERNAL_MBUFS, parse_external_mbufs },
+ { CPERF_CYCLECOUNT_DELAY_US, parse_cyclecount_delay_us },
};
unsigned int i;
@@ -631,8 +653,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
test_data->level_lst.min = RTE_COMP_LEVEL_MIN;
test_data->level_lst.max = RTE_COMP_LEVEL_MAX;
test_data->level_lst.inc = 1;
- test_data->test = CPERF_TEST_TYPE_BENCHMARK;
+ test_data->test = CPERF_TEST_TYPE_THROUGHPUT;
test_data->use_external_mbufs = 0;
+ test_data->cyclecount_delay = 500;
}
int
diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c
index 1b8985b43..b402a0d83 100644
--- a/app/test-compress-perf/comp_perf_test_common.c
+++ b/app/test-compress-perf/comp_perf_test_common.c
@@ -9,7 +9,8 @@
#include "comp_perf.h"
#include "comp_perf_options.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_cyclecount.h"
#include "comp_perf_test_common.h"
#include "comp_perf_test_verify.h"
@@ -276,9 +277,11 @@ comp_perf_allocate_memory(struct comp_test_data *test_data,
snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
+
+ /* one mempool for both src and dst mbufs */
mem->op_pool = rte_comp_op_pool_create(pool_name,
- mem->total_bufs,
- 0, 0, rte_socket_id());
+ mem->total_bufs * 2,
+ 0, 0, rte_socket_id());
if (mem->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
@@ -495,20 +498,24 @@ prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
}
void
-print_test_dynamics(void)
+print_test_dynamics(const struct comp_test_data *test_data)
{
uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz,
MAX_SEG_SIZE);
if (buffer_info.total_buffs > 1) {
- printf("\nWarning: for the current input parameters, number"
+ if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) {
+ printf("\nWarning: for the current input parameters, number"
" of ops is higher than one, which may result"
" in sub-optimal performance.\n");
- printf("To improve the performance (for the current"
+ printf("To improve the performance (for the current"
" input data) following parameters are"
" suggested:\n");
- printf(" * Segment size: %d\n", MAX_SEG_SIZE);
- printf(" * Number of segments: %u\n", opt_total_segs);
+ printf(" * Segment size: %d\n",
+ MAX_SEG_SIZE);
+ printf(" * Number of segments: %u\n",
+ opt_total_segs);
+ }
} else if (buffer_info.total_buffs == 1) {
printf("\nInfo: there is only one op with %u segments -"
" the compression ratio is the best.\n",
diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
index 920642888..72705c6a2 100644
--- a/app/test-compress-perf/comp_perf_test_common.h
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -49,6 +49,6 @@ int
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
void
-print_test_dynamics(void);
+print_test_dynamics(const struct comp_test_data *test_data);
#endif /* _COMP_PERF_TEST_COMMON_H_ */
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c
new file mode 100644
index 000000000..55559a7d5
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
@@ -0,0 +1,614 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include "rte_spinlock.h"
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_cyclecount.h"
+
+struct cperf_cyclecount_ctx {
+ struct cperf_verify_ctx ver;
+
+ uint32_t ops_enq_retries;
+ uint32_t ops_deq_retries;
+
+ uint64_t duration_op;
+ uint64_t duration_enq;
+ uint64_t duration_deq;
+};
+
+void
+cperf_cyclecount_test_destructor(void *arg)
+{
+ struct cperf_cyclecount_ctx *ctx = arg;
+
+ if (arg) {
+ comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
+ rte_free(arg);
+ }
+}
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options)
+{
+ struct cperf_cyclecount_ctx *ctx = NULL;
+
+ ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
+
+ if (ctx == NULL)
+ return NULL;
+
+ ctx->ver.mem.dev_id = dev_id;
+ ctx->ver.mem.qp_id = qp_id;
+ ctx->ver.options = options;
+ ctx->ver.silent = 1; /* ver. part will be silent */
+
+ if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+ && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+ return ctx;
+
+ cperf_cyclecount_test_destructor(ctx);
+ return NULL;
+}
+
+static int
+cperf_cyclecount_op_setup(struct rte_comp_op **ops,
+ struct cperf_cyclecount_ctx *ctx,
+ struct rte_mbuf **input_bufs,
+ struct rte_mbuf **output_bufs,
+ void *priv_xform,
+ uint32_t out_seg_sz)
+{
+ struct comp_test_data *test_data = ctx->ver.options;
+ struct cperf_mem_resources *mem = &ctx->ver.mem;
+
+ uint32_t i, iter, num_iter;
+ int res = 0;
+ uint16_t ops_needed;
+
+ num_iter = test_data->num_iter;
+
+ for (iter = 0; iter < num_iter; iter++) {
+ uint32_t remaining_ops = mem->total_bufs;
+ uint32_t total_deq_ops = 0;
+ uint32_t total_enq_ops = 0;
+ uint16_t num_enq = 0;
+ uint16_t num_deq = 0;
+
+ while (remaining_ops > 0) {
+ uint16_t num_ops = RTE_MIN(remaining_ops,
+ test_data->burst_sz);
+ ops_needed = num_ops;
+
+ /* Allocate compression operations */
+ if (ops_needed && rte_mempool_get_bulk(
+ mem->op_pool,
+ (void **)ops,
+ ops_needed) != 0) {
+ RTE_LOG(ERR, USER1,
+ "Cyclecount: could not allocate enough operations\n");
+ res = -1;
+ goto end;
+ }
+
+ for (i = 0; i < ops_needed; i++) {
+
+ /* Calculate next buffer to attach */
+ /* to operation */
+ uint32_t buf_id = total_enq_ops + i;
+ uint16_t op_id = i;
+
+ /* Reset all data in output buffers */
+ struct rte_mbuf *m = output_bufs[buf_id];
+
+ m->pkt_len = out_seg_sz * m->nb_segs;
+ while (m) {
+ m->data_len = m->buf_len - m->data_off;
+ m = m->next;
+ }
+ ops[op_id]->m_src = input_bufs[buf_id];
+ ops[op_id]->m_dst = output_bufs[buf_id];
+ ops[op_id]->src.offset = 0;
+ ops[op_id]->src.length =
+ rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+ ops[op_id]->dst.offset = 0;
+ ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+ ops[op_id]->input_chksum = buf_id;
+ ops[op_id]->private_xform = priv_xform;
+ }
+
+ /* E N Q U E U I N G */
+ /* assuming that all ops are enqueued */
+ /* instead of the real enqueue operation */
+ num_enq = num_ops;
+
+ remaining_ops -= num_enq;
+ total_enq_ops += num_enq;
+
+ /* D E Q U E U I N G */
+ /* assuming that all ops dequeued */
+ /* instead of the real dequeue operation */
+ num_deq = num_ops;
+
+ total_deq_ops += num_deq;
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)ops, num_deq);
+ }
+ }
+ return res;
+end:
+ rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
+ rte_free(ops);
+
+ return res;
+}
+
+static int
+main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
+{
+ struct comp_test_data *test_data = ctx->ver.options;
+ struct cperf_mem_resources *mem = &ctx->ver.mem;
+ uint8_t dev_id = mem->dev_id;
+ uint32_t i, iter, num_iter;
+ struct rte_comp_op **ops, **deq_ops;
+ void *priv_xform = NULL;
+ struct rte_comp_xform xform;
+ struct rte_mbuf **input_bufs, **output_bufs;
+ int ret, res = 0;
+ int allocated = 0;
+ uint32_t out_seg_sz;
+
+ uint64_t tsc_start, tsc_end, tsc_duration;
+
+ if (test_data == NULL || !test_data->burst_sz) {
+ RTE_LOG(ERR, USER1, "Unknown burst size\n");
+ return -1;
+ }
+ ctx->duration_enq = 0;
+ ctx->duration_deq = 0;
+ ctx->ops_enq_retries = 0;
+ ctx->ops_deq_retries = 0;
+
+ /* one array for both enqueue and dequeue */
+ ops = rte_zmalloc_socket(NULL,
+ 2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+ 0, rte_socket_id());
+
+ if (ops == NULL) {
+ RTE_LOG(ERR, USER1,
+ "Can't allocate memory for ops strucures\n");
+ return -1;
+ }
+
+ deq_ops = &ops[mem->total_bufs];
+
+ if (type == RTE_COMP_COMPRESS) {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_COMPRESS,
+ .compress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .deflate.huffman = test_data->huffman_enc,
+ .level = test_data->level,
+ .window_size = test_data->window_sz,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->decomp_bufs;
+ output_bufs = mem->comp_bufs;
+ out_seg_sz = test_data->out_seg_sz;
+ } else {
+ xform = (struct rte_comp_xform) {
+ .type = RTE_COMP_DECOMPRESS,
+ .decompress = {
+ .algo = RTE_COMP_ALGO_DEFLATE,
+ .chksum = RTE_COMP_CHECKSUM_NONE,
+ .window_size = test_data->window_sz,
+ .hash_algo = RTE_COMP_HASH_ALGO_NONE
+ }
+ };
+ input_bufs = mem->comp_bufs;
+ output_bufs = mem->decomp_bufs;
+ out_seg_sz = test_data->seg_sz;
+ }
+
+ /* Create private xform */
+ if (rte_compressdev_private_xform_create(dev_id, &xform,
+ &priv_xform) < 0) {
+ RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+ res = -1;
+ goto end;
+ }
+
+ tsc_start = rte_rdtsc_precise();
+ ret = cperf_cyclecount_op_setup(ops,
+ ctx,
+ input_bufs,
+ output_bufs,
+ priv_xform,
+ out_seg_sz);
+
+ tsc_end = rte_rdtsc_precise();
+
+ /* ret value check postponed a bit to cancel extra 'if' bias */
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1, "Setup function failed\n");
+ res = -1;
+ goto end;
+ }
+
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_op = tsc_duration;
+
+ num_iter = test_data->num_iter;
+ for (iter = 0; iter < num_iter; iter++) {
+ uint32_t total_ops = mem->total_bufs;
+ uint32_t remaining_ops = mem->total_bufs;
+ uint32_t total_deq_ops = 0;
+ uint32_t total_enq_ops = 0;
+ uint16_t ops_unused = 0;
+ uint16_t num_enq = 0;
+ uint16_t num_deq = 0;
+
+ while (remaining_ops > 0) {
+ uint16_t num_ops = RTE_MIN(remaining_ops,
+ test_data->burst_sz);
+ uint16_t ops_needed = num_ops - ops_unused;
+
+ /*
+ * Move the unused operations from the previous
+ * enqueue_burst call to the front, to maintain order
+ */
+ if ((ops_unused > 0) && (num_enq > 0)) {
+ size_t nb_b_to_mov =
+ ops_unused * sizeof(struct rte_comp_op *);
+
+ memmove(ops, &ops[num_enq], nb_b_to_mov);
+ }
+
+ /* Allocate compression operations */
+ if (ops_needed && rte_mempool_get_bulk(
+ mem->op_pool,
+ (void **)ops,
+ ops_needed) != 0) {
+ RTE_LOG(ERR, USER1,
+ "Could not allocate enough operations\n");
+ res = -1;
+ goto end;
+ }
+ allocated += ops_needed;
+
+ for (i = 0; i < ops_needed; i++) {
+ /*
+ * Calculate next buffer to attach to operation
+ */
+ uint32_t buf_id = total_enq_ops + i +
+ ops_unused;
+ uint16_t op_id = ops_unused + i;
+ /* Reset all data in output buffers */
+ struct rte_mbuf *m = output_bufs[buf_id];
+
+ m->pkt_len = out_seg_sz * m->nb_segs;
+ while (m) {
+ m->data_len = m->buf_len - m->data_off;
+ m = m->next;
+ }
+ ops[op_id]->m_src = input_bufs[buf_id];
+ ops[op_id]->m_dst = output_bufs[buf_id];
+ ops[op_id]->src.offset = 0;
+ ops[op_id]->src.length =
+ rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+ ops[op_id]->dst.offset = 0;
+ ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+ ops[op_id]->input_chksum = buf_id;
+ ops[op_id]->private_xform = priv_xform;
+ }
+
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ tsc_start = rte_rdtsc_precise();
+ num_enq = rte_compressdev_enqueue_burst(dev_id,
+ mem->qp_id, ops,
+ num_ops);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_enq += tsc_duration;
+
+ if (num_enq < num_ops)
+ ctx->ops_enq_retries++;
+
+ if (test_data->cyclecount_delay)
+ rte_delay_us_block(test_data->cyclecount_delay);
+
+ if (num_enq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.enqueue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+
+ ops_unused = num_ops - num_enq;
+ remaining_ops -= num_enq;
+ total_enq_ops += num_enq;
+
+ tsc_start = rte_rdtsc_precise();
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ allocated);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_deq += tsc_duration;
+
+ if (num_deq < allocated)
+ ctx->ops_deq_retries++;
+
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+
+ /* Dequeue the last operations */
+ while (total_deq_ops < total_ops) {
+ if (unlikely(test_data->perf_comp_force_stop))
+ goto end;
+
+ tsc_start = rte_rdtsc_precise();
+ num_deq = rte_compressdev_dequeue_burst(dev_id,
+ mem->qp_id,
+ deq_ops,
+ test_data->burst_sz);
+ tsc_end = rte_rdtsc_precise();
+ tsc_duration = tsc_end - tsc_start;
+ ctx->duration_deq += tsc_duration;
+ ctx->ops_deq_retries++;
+
+ if (num_deq == 0) {
+ struct rte_compressdev_stats stats;
+
+ rte_compressdev_stats_get(dev_id, &stats);
+ if (stats.dequeue_err_count) {
+ res = -1;
+ goto end;
+ }
+ }
+ total_deq_ops += num_deq;
+
+ if (iter == num_iter - 1) {
+ for (i = 0; i < num_deq; i++) {
+ struct rte_comp_op *op = deq_ops[i];
+
+ if (op->status !=
+ RTE_COMP_OP_STATUS_SUCCESS) {
+ RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+ goto end;
+ }
+
+ struct rte_mbuf *m = op->m_dst;
+
+ m->pkt_len = op->produced;
+ uint32_t remaining_data = op->produced;
+ uint16_t data_to_append;
+
+ while (remaining_data > 0) {
+ data_to_append =
+ RTE_MIN(remaining_data,
+ out_seg_sz);
+ m->data_len = data_to_append;
+ remaining_data -=
+ data_to_append;
+ m = m->next;
+ }
+ }
+ }
+ rte_mempool_put_bulk(mem->op_pool,
+ (void **)deq_ops, num_deq);
+ allocated -= num_deq;
+ }
+ }
+ allocated = 0;
+
+end:
+ if (allocated)
+ rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+ rte_compressdev_private_xform_free(dev_id, priv_xform);
+ rte_free(ops);
+
+ if (test_data->perf_comp_force_stop) {
+ RTE_LOG(ERR, USER1,
+ "lcore: %d Perf. test has been aborted by user\n",
+ mem->lcore_id);
+ res = -1;
+ }
+ return res;
+}
+
+int
+cperf_cyclecount_test_runner(void *test_ctx)
+{
+ struct cperf_cyclecount_ctx *ctx = test_ctx;
+ struct comp_test_data *test_data = ctx->ver.options;
+ uint32_t lcore = rte_lcore_id();
+ static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+ static rte_spinlock_t print_spinlock;
+ int i;
+
+ uint32_t ops_enq_retries_comp;
+ uint32_t ops_deq_retries_comp;
+
+ uint32_t ops_enq_retries_decomp;
+ uint32_t ops_deq_retries_decomp;
+
+ uint32_t duration_setup_per_op;
+
+ uint32_t duration_enq_per_op_comp;
+ uint32_t duration_deq_per_op_comp;
+
+ uint32_t duration_enq_per_op_decomp;
+ uint32_t duration_deq_per_op_decomp;
+
+ ctx->ver.mem.lcore_id = lcore;
+
+ /*
+ * printing information about current compression thread
+ */
+ if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+ printf(" lcore: %u,"
+ " driver name: %s,"
+ " device name: %s,"
+ " device id: %u,"
+ " socket id: %u,"
+ " queue pair id: %u\n",
+ lcore,
+ ctx->ver.options->driver_name,
+ rte_compressdev_name_get(ctx->ver.mem.dev_id),
+ ctx->ver.mem.dev_id,
+ rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+ ctx->ver.mem.qp_id);
+
+ /*
+ * First the verification part is needed
+ */
+ if (cperf_verify_test_runner(&ctx->ver))
+ return EXIT_FAILURE;
+
+ /*
+ * Run the tests twice, discarding the first performance
+ * results, before the cache is warmed up
+ */
+
+ /* C O M P R E S S */
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
+ return EXIT_FAILURE;
+ }
+
+ ops_enq_retries_comp = ctx->ops_enq_retries;
+ ops_deq_retries_comp = ctx->ops_deq_retries;
+
+ duration_enq_per_op_comp = ctx->duration_enq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+ duration_deq_per_op_comp = ctx->duration_deq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ /* D E C O M P R E S S */
+ for (i = 0; i < 2; i++) {
+ if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
+ return EXIT_FAILURE;
+ }
+
+ ops_enq_retries_decomp = ctx->ops_enq_retries;
+ ops_deq_retries_decomp = ctx->ops_deq_retries;
+
+ duration_enq_per_op_decomp = ctx->duration_enq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+ duration_deq_per_op_decomp = ctx->duration_deq /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ duration_setup_per_op = ctx->duration_op /
+ (ctx->ver.mem.total_bufs * test_data->num_iter);
+
+ /* R E P O R T processing */
+ if (rte_atomic16_test_and_set(&display_once)) {
+
+ rte_spinlock_lock(&print_spinlock);
+
+ printf("\nLegend for the table\n"
+ " - Retries section: number of retries for the following operations:\n"
+ " [C-e] - compression enqueue\n"
+ " [C-d] - compression dequeue\n"
+ " [D-e] - decompression enqueue\n"
+ " [D-d] - decompression dequeue\n"
+ " - Cycles section: number of cycles per 'op' for the following operations:\n"
+ " setup/op - memory allocation, op configuration and memory dealocation\n"
+ " [C-e] - compression enqueue\n"
+ " [C-d] - compression dequeue\n"
+ " [D-e] - decompression enqueue\n"
+ " [D-d] - decompression dequeue\n\n");
+
+ printf("\n%12s%6s%12s%17s",
+ "lcore id", "Level", "Comp size", "Comp ratio [%]");
+
+ printf(" |%10s %6s %8s %6s %8s",
+ " Retries:",
+ "[C-e]", "[C-d]",
+ "[D-e]", "[D-d]");
+
+ printf(" |%9s %9s %9s %9s %9s %9s\n",
+ " Cycles:",
+ "setup/op",
+ "[C-e]", "[C-d]",
+ "[D-e]", "[D-d]");
+
+ rte_spinlock_unlock(&print_spinlock);
+ }
+
+ rte_spinlock_lock(&print_spinlock);
+
+ printf("%12u"
+ "%6u"
+ "%12zu"
+ "%17.2f",
+ ctx->ver.mem.lcore_id,
+ test_data->level,
+ ctx->ver.comp_data_sz,
+ ctx->ver.ratio);
+
+ printf(" |%10s %6u %8u %6u %8u",
+ " ",
+ ops_enq_retries_comp,
+ ops_deq_retries_comp,
+ ops_enq_retries_decomp,
+ ops_deq_retries_decomp);
+
+ printf(" |%9s %9u %9u %9u %9u %9u\n",
+ " ",
+ duration_setup_per_op,
+ duration_enq_per_op_comp,
+ duration_deq_per_op_comp,
+ duration_enq_per_op_decomp,
+ duration_deq_per_op_decomp);
+
+ rte_spinlock_unlock(&print_spinlock);
+
+ return EXIT_SUCCESS;
+}
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.h b/app/test-compress-perf/comp_perf_test_cyclecount.h
new file mode 100644
index 000000000..8e1b4d9e9
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_CYCLECOUNT_
+#define _COMP_PERF_TEST_CYCLECOUNT_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+void
+cperf_cyclecount_test_destructor(void *arg);
+
+int
+cperf_cyclecount_test_runner(void *test_ctx);
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+ struct comp_test_data *options);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_throughput.c
similarity index 97%
rename from app/test-compress-perf/comp_perf_test_benchmark.c
rename to app/test-compress-perf/comp_perf_test_throughput.c
index 0c6bb9b45..13922b658 100644
--- a/app/test-compress-perf/comp_perf_test_benchmark.c
+++ b/app/test-compress-perf/comp_perf_test_throughput.c
@@ -8,10 +8,10 @@
#include <rte_cycles.h>
#include <rte_compressdev.h>
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
void
-cperf_benchmark_test_destructor(void *arg)
+cperf_throughput_test_destructor(void *arg)
{
if (arg) {
comp_perf_free_memory(
@@ -22,7 +22,7 @@ cperf_benchmark_test_destructor(void *arg)
}
void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
struct comp_test_data *options)
{
struct cperf_benchmark_ctx *ctx = NULL;
@@ -41,7 +41,7 @@ cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
&& !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
return ctx;
- cperf_benchmark_test_destructor(ctx);
+ cperf_throughput_test_destructor(ctx);
return NULL;
}
@@ -324,7 +324,7 @@ main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
}
int
-cperf_benchmark_test_runner(void *test_ctx)
+cperf_throughput_test_runner(void *test_ctx)
{
struct cperf_benchmark_ctx *ctx = test_ctx;
struct comp_test_data *test_data = ctx->ver.options;
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_throughput.h
similarity index 80%
rename from app/test-compress-perf/comp_perf_test_benchmark.h
rename to app/test-compress-perf/comp_perf_test_throughput.h
index d9b2694b8..467e3aa78 100644
--- a/app/test-compress-perf/comp_perf_test_benchmark.h
+++ b/app/test-compress-perf/comp_perf_test_throughput.h
@@ -24,13 +24,13 @@ struct cperf_benchmark_ctx {
};
void
-cperf_benchmark_test_destructor(void *arg);
+cperf_throughput_test_destructor(void *arg);
int
-cperf_benchmark_test_runner(void *test_ctx);
+cperf_throughput_test_runner(void *test_ctx);
void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
struct comp_test_data *options);
#endif
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
index 758a22ff5..5e13257b7 100644
--- a/app/test-compress-perf/comp_perf_test_verify.c
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -48,8 +48,8 @@ static int
main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type)
{
struct comp_test_data *test_data = ctx->options;
- uint8_t *output_data_ptr;
- size_t *output_data_sz;
+ uint8_t *output_data_ptr = NULL;
+ size_t *output_data_sz = NULL;
struct cperf_mem_resources *mem = &ctx->mem;
uint8_t dev_id = mem->dev_id;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 6b56dd680..ed21605d8 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -11,32 +11,41 @@
#include <rte_log.h>
#include <rte_compressdev.h>
-#include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
#include "comp_perf.h"
+#include "comp_perf_options.h"
#include "comp_perf_test_common.h"
+#include "comp_perf_test_cyclecount.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_verify.h"
#define NUM_MAX_XFORMS 16
#define NUM_MAX_INFLIGHT_OPS 512
__extension__
const char *comp_perf_test_type_strs[] = {
- [CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
- [CPERF_TEST_TYPE_VERIFY] = "verify"
+ [CPERF_TEST_TYPE_THROUGHPUT] = "throughput",
+ [CPERF_TEST_TYPE_VERIFY] = "verify",
+ [CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount"
};
__extension__
static const struct cperf_test cperf_testmap[] = {
- [CPERF_TEST_TYPE_BENCHMARK] = {
- cperf_benchmark_test_constructor,
- cperf_benchmark_test_runner,
- cperf_benchmark_test_destructor
+ [CPERF_TEST_TYPE_THROUGHPUT] = {
+ cperf_throughput_test_constructor,
+ cperf_throughput_test_runner,
+ cperf_throughput_test_destructor
+
},
[CPERF_TEST_TYPE_VERIFY] = {
cperf_verify_test_constructor,
cperf_verify_test_runner,
cperf_verify_test_destructor
+ },
+
+ [CPERF_TEST_TYPE_PMDCC] = {
+ cperf_cyclecount_test_constructor,
+ cperf_cyclecount_test_runner,
+ cperf_cyclecount_test_destructor
}
};
@@ -116,7 +125,8 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data,
enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
if (enabled_cdev_count == 0) {
- RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+ RTE_LOG(ERR, USER1, "No compress devices type %s available,"
+ " please check the list of specified devices in EAL section\n",
test_data->driver_name);
return -EINVAL;
}
@@ -270,6 +280,7 @@ comp_perf_dump_input_data(struct comp_test_data *test_data)
data += data_to_read;
}
+ printf("\n");
if (test_data->input_data_sz > actual_file_sz)
RTE_LOG(INFO, USER1,
"%zu bytes read from file %s, extending the file %.2f times\n",
@@ -365,9 +376,12 @@ main(int argc, char **argv)
else
test_data->level = test_data->level_lst.list[0];
- printf("App uses socket: %u\n", rte_socket_id());
+ printf("\nApp uses socket: %u\n", rte_socket_id());
printf("Burst size = %u\n", test_data->burst_sz);
printf("Input data size = %zu\n", test_data->input_data_sz);
+ if (test_data->test == CPERF_TEST_TYPE_PMDCC)
+ printf("Cycle-count delay = %u [us]\n",
+ test_data->cyclecount_delay);
test_data->cleanup = ST_DURING_TEST;
total_nb_qps = nb_compressdevs * test_data->nb_qps;
@@ -394,7 +408,7 @@ main(int argc, char **argv)
i++;
}
- print_test_dynamics(); /* constructors must be executed first */
+ print_test_dynamics(test_data);
while (test_data->level <= test_data->level_lst.max) {
@@ -472,7 +486,28 @@ main(int argc, char **argv)
}
__rte_weak void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+ uint16_t qp_id __rte_unused,
+ struct comp_test_data *options __rte_unused)
+{
+ RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n");
+ return NULL;
+}
+
+__rte_weak void
+cperf_cyclecount_test_destructor(void *arg __rte_unused)
+{
+ RTE_LOG(INFO, USER1, "Something wrong happened!!!\n");
+}
+
+__rte_weak int
+cperf_cyclecount_test_runner(void *test_ctx __rte_unused)
+{
+ return 0;
+}
+
+__rte_weak void *
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
@@ -481,13 +516,13 @@ cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
}
__rte_weak void
-cperf_benchmark_test_destructor(void *arg __rte_unused)
+cperf_throughput_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+cperf_throughput_test_runner(void *test_ctx __rte_unused)
{
return 0;
}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index 1136f04bc..1fe26cc14 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -5,6 +5,7 @@ allow_experimental_apis = true
sources = files('comp_perf_options_parse.c',
'main.c',
'comp_perf_test_verify.c',
- 'comp_perf_test_benchmark.c',
+ 'comp_perf_test_throughput.c',
+ 'comp_perf_test_cyclecount.c',
'comp_perf_test_common.c')
deps = ['compressdev']
--
2.17.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool
2019-12-11 15:50 [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool Artur Trybula
@ 2020-01-13 15:18 ` Trahe, Fiona
2020-01-28 6:10 ` Akhil Goyal
0 siblings, 1 reply; 3+ messages in thread
From: Trahe, Fiona @ 2020-01-13 15:18 UTC (permalink / raw)
To: Trybula, ArturX, dev, shallyv, Dybkowski, AdamX, Danilewicz,
MarcinX, akhil.goyal
Cc: Trahe, Fiona
> -----Original Message-----
> From: Trybula, ArturX <arturx.trybula@intel.com>
> Sent: Wednesday, December 11, 2019 3:50 PM
> To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; shallyv@marvell.com; Dybkowski, AdamX
> <adamx.dybkowski@intel.com>; Danilewicz, MarcinX <marcinx.danilewicz@intel.com>; Trybula,
> ArturX <arturx.trybula@intel.com>; akhil.goyal@nxp.com
> Subject: [PATCH] test/compress: add cycle-count mode to the perf tool
>
> This commit adds cycle-count mode to the compression perf tool.
> The new mode enhances the compression performance tool to allow
> cycle-count measurement of both hardware and softwate PMDs.
>
> Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool
2020-01-13 15:18 ` Trahe, Fiona
@ 2020-01-28 6:10 ` Akhil Goyal
0 siblings, 0 replies; 3+ messages in thread
From: Akhil Goyal @ 2020-01-28 6:10 UTC (permalink / raw)
To: Trahe, Fiona, Trybula, ArturX, dev, shallyv, Dybkowski, AdamX,
Danilewicz, MarcinX
> >
> > This commit adds cycle-count mode to the compression perf tool.
> > The new mode enhances the compression performance tool to allow
> > cycle-count measurement of both hardware and softwate PMDs.
> >
> > Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> Acked-by: Fiona Trahe <fiona.trahe@intel.com>
Applied to dpdk-next-crypto
Thanks.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-01-28 6:10 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-11 15:50 [dpdk-dev] [PATCH] test/compress: add cycle-count mode to the perf tool Artur Trybula
2020-01-13 15:18 ` Trahe, Fiona
2020-01-28 6:10 ` Akhil Goyal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).