DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/3] app: add initial version of compress-perf
@ 2018-10-01 13:27 Tomasz Jozwiak
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 1/3] app/compress-perf: add parser Tomasz Jozwiak
                   ` (3 more replies)
  0 siblings, 4 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-10-01 13:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, akhil.goyal, pablo.de.lara.guarch

This patchset adds initial version of compression performance
test.

Tomasz Jozwiak (3):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc file

 MAINTAINERS                                      |   5 +
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 ++
 app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++
 app/test-compress-perf/main.c                    | 896 +++++++++++++++++++++++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 doc/guides/tools/comp_perf.rst                   |  73 ++
 10 files changed, 1662 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH 1/3] app/compress-perf: add parser
  2018-10-01 13:27 [dpdk-dev] [PATCH 0/3] app: add initial version of compress-perf Tomasz Jozwiak
@ 2018-10-01 13:27 ` Tomasz Jozwiak
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-10-01 13:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, akhil.goyal, pablo.de.lara.guarch
  Cc: De, Lara, Guarch

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 740 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index 99e0b93..c2ebb11 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..bef4d2f
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,596 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 65535)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
+		"		(default: dynamic)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (strcmp(test_data->driver_name, "") == 0) {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (strcmp(test_data->input_file, "") == 0) {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index 155c7d4..23bf58a0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -902,6 +902,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-01 13:27 [dpdk-dev] [PATCH 0/3] app: add initial version of compress-perf Tomasz Jozwiak
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 1/3] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-10-01 13:27 ` Tomasz Jozwiak
  2018-10-12 10:15   ` Verma, Shally
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 3/3] doc/guides/tools: add doc file Tomasz Jozwiak
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
  3 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-10-01 13:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, akhil.goyal, pablo.de.lara.guarch
  Cc: De, Lara, Guarch

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/main.c | 844 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 844 insertions(+)

diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..093dfaf 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,13 +5,721 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_ISAL_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+static int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static int
+comp_perf_check_capabilities(struct comp_test_data *test_data)
+{
+	const struct rte_compressdev_capabilities *cap;
+
+	cap = rte_compressdev_capability_get(test_data->cdev_id,
+					     RTE_COMP_ALGO_DEFLATE);
+
+	if (cap == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not support DEFLATE\n");
+		return -1;
+	}
+
+	uint64_t comp_flags = cap->comp_feature_flags;
+
+	/* Huffman enconding */
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Fixed Huffman\n");
+		return -1;
+	}
+
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Dynamic Huffman\n");
+		return -1;
+	}
+
+	/* Window size */
+	if (test_data->window_sz != -1) {
+		if (param_range_check(test_data->window_sz, &cap->window_size)
+				< 0) {
+			RTE_LOG(ERR, USER1,
+				"Compress device does not support "
+				"this window size\n");
+			return -1;
+		}
+	} else
+		/* Set window size to PMD maximum if none was specified */
+		test_data->window_sz = cap->window_size.max;
+
+	/* Check if chained mbufs is supported */
+	if (test_data->max_sgl_segs > 1  &&
+			(comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
+		RTE_LOG(INFO, USER1, "Compress device does not support "
+				"chained mbufs. Max SGL segments set to 1\n");
+		test_data->max_sgl_segs = 1;
+	}
+
+	/* Level 0 support */
+	if (test_data->level.min == 0 &&
+			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
+		RTE_LOG(ERR, USER1, "Compress device does not support "
+				"level 0 (no compression)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+comp_perf_allocate_memory(struct comp_test_data *test_data)
+{
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	test_data->op_pool = rte_comp_op_pool_create("op_pool",
+				  test_data->total_bufs,
+				  0, 0, rte_socket_id());
+	if (test_data->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	test_data->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+							+ MIN_ISAL_SIZE, 0,
+				rte_socket_id());
+	if (test_data->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (test_data->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->comp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+comp_perf_dump_input_data(struct comp_test_data *test_data)
+{
+	FILE *f = fopen(test_data->input_file, "r");
+
+	if (f == NULL) {
+		RTE_LOG(ERR, USER1, "Input file could not be opened\n");
+		return -1;
+	}
+
+	if (fseek(f, 0, SEEK_END) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto err;
+	}
+	size_t actual_file_sz = ftell(f);
+	/* If extended input data size has not been set,
+	 * input data size = file size
+	 */
+
+	if (test_data->input_data_sz == 0)
+		test_data->input_data_sz = actual_file_sz;
+
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto err;
+	}
+
+	test_data->input_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0, rte_socket_id());
+
+	if (test_data->input_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		goto err;
+	}
+
+	size_t remaining_data = test_data->input_data_sz;
+	uint8_t *data = test_data->input_data;
+
+	while (remaining_data > 0) {
+		size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
+
+		if (fread(data, data_to_read, 1, f) != 1) {
+			RTE_LOG(ERR, USER1, "Input file could not be read\n");
+			goto err;
+		}
+		if (fseek(f, 0, SEEK_SET) != 0) {
+			RTE_LOG(ERR, USER1,
+				"Size of input could not be calculated\n");
+			goto err;
+		}
+		remaining_data -= data_to_read;
+		data += data_to_read;
+	}
+
+	if (test_data->input_data_sz > actual_file_sz)
+		RTE_LOG(INFO, USER1,
+		  "%zu bytes read from file %s, extending the file %.2f times\n",
+			test_data->input_data_sz, test_data->input_file,
+			(double)test_data->input_data_sz/actual_file_sz);
+	else
+		RTE_LOG(INFO, USER1,
+			"%zu bytes read from file %s\n",
+			test_data->input_data_sz, test_data->input_file);
+
+	fclose(f);
+
+	return 0;
+
+err:
+	fclose(f);
+	rte_free(test_data->input_data);
+	test_data->input_data = NULL;
+
+	return -1;
+}
+
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
+{
+	uint8_t enabled_cdev_count;
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
+
+	if (enabled_cdev_count > 1)
+		RTE_LOG(INFO, USER1,
+			"Only the first compress device will be used\n");
+
+	test_data->cdev_id = enabled_cdevs[0];
+
+	if (comp_perf_check_capabilities(test_data) < 0)
+		return -1;
+
+	/* Configure compressdev (one device, one queue pair) */
+	struct rte_compressdev_config config = {
+		.socket_id = rte_socket_id(),
+		.nb_queue_pairs = 1,
+		.max_nb_priv_xforms = NUM_MAX_XFORMS,
+		.max_nb_streams = 0
+	};
+
+	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
+		RTE_LOG(ERR, USER1, "Device configuration failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
+			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
+		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_start(test_data->cdev_id) < 0) {
+		RTE_LOG(ERR, USER1, "Device could not be started\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+prepare_bufs(struct comp_test_data *test_data)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		test_data->decomp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+		if (test_data->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		test_data->comp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->comp_buf_pool);
+		if (test_data->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->comp_bufs[i],
+					test_data->seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+free_bufs(struct comp_test_data *test_data)
+{
+	uint32_t i;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		rte_pktmbuf_free(test_data->comp_bufs[i]);
+		rte_pktmbuf_free(test_data->decomp_bufs[i]);
+	}
+	rte_free(test_data->comp_bufs);
+	rte_free(test_data->decomp_bufs);
+}
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz,
+			unsigned int benchmarking)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknow burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	if (benchmarking) {
+		tsc_start = rte_rdtsc();
+		num_iter = test_data->num_iter;
+	} else
+		num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							  op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (benchmarking) {
+		tsc_end = rte_rdtsc();
+		tsc_duration = tsc_end - tsc_start;
+
+		if (type == RTE_COMP_COMPRESS)
+			test_data->comp_tsc_duration[level] =
+					tsc_duration / num_iter;
+		else
+			test_data->decomp_tsc_duration[level] =
+					tsc_duration / num_iter;
+	}
+
+	if (benchmarking == 0 && output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
 int
 main(int argc, char **argv)
 {
+	uint8_t level, level_idx = 0;
+	uint8_t i;
 	int ret;
 	struct comp_test_data *test_data;
 
@@ -43,9 +751,145 @@ main(int argc, char **argv)
 		goto err;
 	}
 
+	if (comp_perf_initialize_compressdev(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_dump_input_data(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_allocate_memory(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (prepare_bufs(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (test_data->level.inc != 0)
+		level = test_data->level.min;
+	else
+		level = test_data->level.list[0];
+
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+
+	printf("Burst size = %u\n", test_data->burst_sz);
+	printf("File size = %zu\n", test_data->input_data_sz);
+
+	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
+		"Level", "Comp size", "Comp ratio [%]",
+		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
+		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+
+	while (level <= test_data->level.max) {
+		/*
+		 * Run a first iteration, to verify compression and
+		 * get the compression ratio for the level
+		 */
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+			      test_data->compressed_data,
+			      &comp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto err;
+		}
+
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+			      test_data->decompressed_data,
+			      &decomp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto err;
+		}
+
+		if (decomp_data_sz != test_data->input_data_sz) {
+			RTE_LOG(ERR, USER1,
+		   "Decompressed data length not equal to input data length\n");
+			RTE_LOG(ERR, USER1,
+				"Decompressed size = %zu, expected = %zu\n",
+				decomp_data_sz, test_data->input_data_sz);
+			ret = EXIT_FAILURE;
+			goto err;
+		} else {
+			if (memcmp(test_data->decompressed_data,
+					test_data->input_data,
+					test_data->input_data_sz) != 0) {
+				RTE_LOG(ERR, USER1,
+			    "Decompressed data is not the same as file data\n");
+				ret = EXIT_FAILURE;
+				goto err;
+			}
+		}
+
+		double ratio = (double) comp_data_sz /
+						test_data->input_data_sz * 100;
+
+		/*
+		 * Run the tests twice, discarding the first performance
+		 * results, before the cache is warmed up
+		 */
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto err;
+			}
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto err;
+			}
+		}
+
+		uint64_t comp_tsc_duration =
+				test_data->comp_tsc_duration[level];
+		double comp_tsc_byte = (double)comp_tsc_duration /
+						test_data->input_data_sz;
+		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
+				1000000000;
+		uint64_t decomp_tsc_duration =
+				test_data->decomp_tsc_duration[level];
+		double decomp_tsc_byte = (double)decomp_tsc_duration /
+						test_data->input_data_sz;
+		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
+				1000000000;
+
+		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
+					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
+		       level, comp_data_sz, ratio, comp_tsc_duration,
+		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
+		       decomp_tsc_byte, decomp_gbps);
+
+		if (test_data->level.inc != 0)
+			level += test_data->level.inc;
+		else {
+			if (++level_idx == test_data->level.count)
+				break;
+			level = test_data->level.list[level_idx];
+		}
+	}
+
 	ret = EXIT_SUCCESS;
 
 err:
+	if (test_data->cdev_id != -1)
+		rte_compressdev_stop(test_data->cdev_id);
+
+	free_bufs(test_data);
+	rte_free(test_data->compressed_data);
+	rte_free(test_data->decompressed_data);
+	rte_free(test_data->input_data);
+	rte_mempool_free(test_data->comp_buf_pool);
+	rte_mempool_free(test_data->decomp_buf_pool);
+	rte_mempool_free(test_data->op_pool);
+
 	rte_free(test_data);
 
 	return ret;
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH 3/3] doc/guides/tools: add doc file
  2018-10-01 13:27 [dpdk-dev] [PATCH 0/3] app: add initial version of compress-perf Tomasz Jozwiak
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 1/3] app/compress-perf: add parser Tomasz Jozwiak
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-10-01 13:27 ` Tomasz Jozwiak
  2018-10-16  9:26   ` Kovacevic, Marko
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
  3 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-10-01 13:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, akhil.goyal, pablo.de.lara.guarch
  Cc: De, Lara, Guarch

Added initial version of compression performance test
description file.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 MAINTAINERS                    |  5 +++
 doc/guides/tools/comp_perf.rst | 73 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 doc/guides/tools/comp_perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 5967c1d..f8c3820 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1184,6 +1184,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Compression performance test application
+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
+F: app/test-compress-perf/
+F: doc/guides/tools/comp_perf.rst
+
 Crypto performance test application
 M: Declan Doherty <declan.doherty@intel.com>
 F: app/test-crypto-perf/
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
new file mode 100644
index 0000000..14abe8b
--- /dev/null
+++ b/doc/guides/tools/comp_perf.rst
@@ -0,0 +1,73 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+dpdk-test-crypto-perf Application
+=================================
+
+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
+utility that allows measuring performance parameters of PMDs available in the
+compress tree. The application reads the data from a file (--input-file),
+dumps all the file into a buffer and fill out the data of input mbufs,
+which are passed to compress device with compression operations.
+Then, the output buffers are fed into the decompression stage, and the resulted
+data is compared against the original data (verification phase). After that,
+a number of iterations are performed, compressing first and decompressing later,
+to check the throughput rate
+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
+
+.. Note::
+
+    This is the initial version of this document
+
+Command line options
+--------------------
+
+ ``--driver-name NAME``: compress driver to use
+
+ ``--input-file NAME``: file to compress and decompress
+
+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
+
+ ``--seg-sz N``: size of segment to store the data (default: 2048)
+
+ ``--burst-sz N``: compress operation burst size
+
+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
+
+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 65535)
+
+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
+
+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
+
+ ``--huffman-enc [fixed/dynamic/default]``: Huffman encoding (default: dynamic)
+
+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
+
+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
+
+ ``-h``: prints this help
+
+
+Compiling the Application
+-------------------------
+
+**Step 1: PMD setting**
+
+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
+can be disabled by default in the build configuration file ``common_base``.
+The compression device drivers PMD which should be tested can be enabled by setting::
+
+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
+
+
+Running the Application
+-----------------------
+
+The tool application has a number of command line options. Here is the sample command line:
+
+.. code-block:: console
+
+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
+
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-10-12 10:15   ` Verma, Shally
  2018-10-15 15:10     ` Daly, Lee
  2018-11-02  9:59     ` Jozwiak, TomaszX
  0 siblings, 2 replies; 76+ messages in thread
From: Verma, Shally @ 2018-10-12 10:15 UTC (permalink / raw)
  To: Tomasz Jozwiak, dev, fiona.trahe, akhil.goyal, pablo.de.lara.guarch
  Cc: De, Lara, Guarch

HI TomaszX

Sorry for delay in response. Comments inline.

>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
>Sent: 01 October 2018 18:57
>To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com; akhil.goyal@nxp.com; pablo.de.lara.guarch@intel.com
>Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Added performance measurement part into compression perf. test.
>
>Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>---
> app/test-compress-perf/main.c | 844 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 844 insertions(+)
>
>diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
>index f52b98d..093dfaf 100644
>--- a/app/test-compress-perf/main.c
>+++ b/app/test-compress-perf/main.c
>@@ -5,13 +5,721 @@
> #include <rte_malloc.h>
> #include <rte_eal.h>
> #include <rte_log.h>
>+#include <rte_cycles.h>
> #include <rte_compressdev.h>
>
> #include "comp_perf_options.h"
>
>+#define NUM_MAX_XFORMS 16
>+#define NUM_MAX_INFLIGHT_OPS 512
>+#define EXPANSE_RATIO 1.05
>+#define MIN_ISAL_SIZE 8
>+
>+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
>+
>+static int
>+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
>+{
>+       unsigned int next_size;
>+
>+       /* Check lower/upper bounds */
>+       if (size < range->min)
>+               return -1;
>+
>+       if (size > range->max)
>+               return -1;
>+
>+       /* If range is actually only one value, size is correct */
>+       if (range->increment == 0)
>+               return 0;
>+
>+       /* Check if value is one of the supported sizes */
>+       for (next_size = range->min; next_size <= range->max;
>+                       next_size += range->increment)
>+               if (size == next_size)
>+                       return 0;
>+
>+       return -1;
>+}
>+
>+static int
>+comp_perf_check_capabilities(struct comp_test_data *test_data)
>+{
>+       const struct rte_compressdev_capabilities *cap;
>+
>+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>+                                            RTE_COMP_ALGO_DEFLATE);
>+
>+       if (cap == NULL) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not support DEFLATE\n");
>+               return -1;
>+       }
>+
>+       uint64_t comp_flags = cap->comp_feature_flags;
>+
>+       /* Huffman enconding */
>+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not supported Fixed Huffman\n");
>+               return -1;
>+       }
>+
>+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not supported Dynamic Huffman\n");
>+               return -1;
>+       }
>+
>+       /* Window size */
>+       if (test_data->window_sz != -1) {
>+               if (param_range_check(test_data->window_sz, &cap->window_size)
What if cap->window_size is 0 i.e. implementation default?

>+                               < 0) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Compress device does not support "
>+                               "this window size\n");
>+                       return -1;
>+               }
>+       } else
>+               /* Set window size to PMD maximum if none was specified */
>+               test_data->window_sz = cap->window_size.max;
>+
>+       /* Check if chained mbufs is supported */
>+       if (test_data->max_sgl_segs > 1  &&
>+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
>+               RTE_LOG(INFO, USER1, "Compress device does not support "
>+                               "chained mbufs. Max SGL segments set to 1\n");
>+               test_data->max_sgl_segs = 1;
>+       }
>+
>+       /* Level 0 support */
>+       if (test_data->level.min == 0 &&
>+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
>+               RTE_LOG(ERR, USER1, "Compress device does not support "
>+                               "level 0 (no compression)\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+comp_perf_allocate_memory(struct comp_test_data *test_data)
>+{
>+       /* Number of segments for input and output
>+        * (compression and decompression)
>+        */
>+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
>+                       test_data->seg_sz);
>+       test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
>+                               total_segs,
>+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>+                               rte_socket_id());
>+       if (test_data->comp_buf_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>+               return -1;
>+       }
>+
>+       test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
>+                               total_segs,
>+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>+                               rte_socket_id());
>+       if (test_data->decomp_buf_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>+               return -1;
>+       }
>+
>+       test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
>+
>+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
>+                                 test_data->total_bufs,
>+                                 0, 0, rte_socket_id());
>+       if (test_data->op_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
>+               return -1;
>+       }
>+
>+       /*
>+        * Compressed data might be a bit larger than input data,
>+        * if data cannot be compressed
Possible only if it's zlib format right? Or deflate as well?

>+        */
>+       test_data->compressed_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz * EXPANSE_RATIO
>+                                                       + MIN_ISAL_SIZE, 0,
>+                               rte_socket_id());
>+       if (test_data->compressed_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz, 0,
>+                               rte_socket_id());
>+       if (test_data->decompressed_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
>+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>+                       0, rte_socket_id());
>+       if (test_data->comp_bufs == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
>+                               " could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
>+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>+                       0, rte_socket_id());
>+       if (test_data->decomp_bufs == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
>+                               " could not be allocated\n");
>+               return -1;
>+       }
>+       return 0;
>+}
>+
>+static int
>+comp_perf_dump_input_data(struct comp_test_data *test_data)
>+{
>+       FILE *f = fopen(test_data->input_file, "r");
>+
>+       if (f == NULL) {
>+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
>+               return -1;
>+       }
>+
>+       if (fseek(f, 0, SEEK_END) != 0) {
>+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>+               goto err;
>+       }
>+       size_t actual_file_sz = ftell(f);
>+       /* If extended input data size has not been set,
>+        * input data size = file size
>+        */
>+
>+       if (test_data->input_data_sz == 0)
>+               test_data->input_data_sz = actual_file_sz;
>+
>+       if (fseek(f, 0, SEEK_SET) != 0) {
>+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>+               goto err;
>+       }
>+
>+       test_data->input_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz, 0, rte_socket_id());
>+
>+       if (test_data->input_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               goto err;
>+       }
>+
>+       size_t remaining_data = test_data->input_data_sz;
>+       uint8_t *data = test_data->input_data;
>+
>+       while (remaining_data > 0) {
>+               size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
>+
>+               if (fread(data, data_to_read, 1, f) != 1) {
>+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>+                       goto err;
>+               }
>+               if (fseek(f, 0, SEEK_SET) != 0) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Size of input could not be calculated\n");
>+                       goto err;
>+               }
>+               remaining_data -= data_to_read;
>+               data += data_to_read;
It looks like it will run 2nd time only if input file size < input data size in which case it will just keep filling input buffer with repeated data. 
Is that the intention here?

>+       }
>+
>+       if (test_data->input_data_sz > actual_file_sz)
>+               RTE_LOG(INFO, USER1,
>+                 "%zu bytes read from file %s, extending the file %.2f times\n",
>+                       test_data->input_data_sz, test_data->input_file,
>+                       (double)test_data->input_data_sz/actual_file_sz);
>+       else
>+               RTE_LOG(INFO, USER1,
>+                       "%zu bytes read from file %s\n",
>+                       test_data->input_data_sz, test_data->input_file);
>+
>+       fclose(f);
>+
>+       return 0;
>+
>+err:
>+       fclose(f);
>+       rte_free(test_data->input_data);
>+       test_data->input_data = NULL;
>+
>+       return -1;
>+}
>+
>+static int
>+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
>+{
>+       uint8_t enabled_cdev_count;
>+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
>+
>+       enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
>+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
>+       if (enabled_cdev_count == 0) {
>+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
>+                               test_data->driver_name);
>+               return -EINVAL;
>+       }
>+
>+       if (enabled_cdev_count > 1)
>+               RTE_LOG(INFO, USER1,
>+                       "Only the first compress device will be used\n");
>+
>+       test_data->cdev_id = enabled_cdevs[0];
>+
>+       if (comp_perf_check_capabilities(test_data) < 0)
>+               return -1;
>+
>+       /* Configure compressdev (one device, one queue pair) */
>+       struct rte_compressdev_config config = {
>+               .socket_id = rte_socket_id(),
>+               .nb_queue_pairs = 1,
>+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
>+               .max_nb_streams = 0
>+       };
>+
>+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
>+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
>+               return -1;
>+       }
>+
>+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
>+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
>+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
>+               return -1;
>+       }
>+
>+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
>+               RTE_LOG(ERR, USER1, "Device could not be started\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+prepare_bufs(struct comp_test_data *test_data)
>+{
>+       uint32_t remaining_data = test_data->input_data_sz;
>+       uint8_t *input_data_ptr = test_data->input_data;
>+       size_t data_sz;
>+       uint8_t *data_addr;
>+       uint32_t i, j;
>+
>+       for (i = 0; i < test_data->total_bufs; i++) {
>+               /* Allocate data in input mbuf and copy data from input file */
>+               test_data->decomp_bufs[i] =
>+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>+               if (test_data->decomp_bufs[i] == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>+                       return -1;
>+               }
>+
>+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>+               data_addr = (uint8_t *) rte_pktmbuf_append(
>+                                       test_data->decomp_bufs[i], data_sz);
>+               if (data_addr == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>+                       return -1;
>+               }
>+               rte_memcpy(data_addr, input_data_ptr, data_sz);
>+
>+               input_data_ptr += data_sz;
>+               remaining_data -= data_sz;
>+
>+               /* Already one segment in the mbuf */
>+               uint16_t segs_per_mbuf = 1;
>+
>+               /* Chain mbufs if needed for input mbufs */
>+               while (segs_per_mbuf < test_data->max_sgl_segs
>+                               && remaining_data > 0) {
>+                       struct rte_mbuf *next_seg =
>+                               rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>+
>+                       if (next_seg == NULL) {
>+                               RTE_LOG(ERR, USER1,
>+                                       "Could not allocate mbuf\n");
>+                               return -1;
>+                       }
>+
>+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>+                               data_sz);
>+
>+                       if (data_addr == NULL) {
>+                               RTE_LOG(ERR, USER1, "Could not append data\n");
Since a new buffer per segment is allocated, so is it possible for append to fail? think, this check is redundant here.
>+                               return -1;
>+                       }
>+
>+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
>+                       input_data_ptr += data_sz;
>+                       remaining_data -= data_sz;
>+
>+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
>+                                       next_seg) < 0) {
>+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>+                               return -1;
>+                       }
>+                       segs_per_mbuf++;
>+               }
>+
>+               /* Allocate data in output mbuf */
>+               test_data->comp_bufs[i] =
>+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
>+               if (test_data->comp_bufs[i] == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>+                       return -1;
>+               }
>+               data_addr = (uint8_t *) rte_pktmbuf_append(
>+                                       test_data->comp_bufs[i],
>+                                       test_data->seg_sz);
>+               if (data_addr == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>+                       return -1;
>+               }
>+
>+               /* Chain mbufs if needed for output mbufs */
>+               for (j = 1; j < segs_per_mbuf; j++) {
>+                       struct rte_mbuf *next_seg =
>+                               rte_pktmbuf_alloc(test_data->comp_buf_pool);
>+
>+                       if (next_seg == NULL) {
>+                               RTE_LOG(ERR, USER1,
>+                                       "Could not allocate mbuf\n");
>+                               return -1;
>+                       }
>+
>+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>+                               test_data->seg_sz);
>+
>+                       if (data_addr == NULL) {
>+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>+                               return -1;
>+                       }
>+
>+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
>+                                       next_seg) < 0) {
>+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>+                               return -1;
>+                       }
>+               }
>+       }
>+
>+       return 0;
>+}
>+
>+static void
>+free_bufs(struct comp_test_data *test_data)
>+{
>+       uint32_t i;
>+
>+       for (i = 0; i < test_data->total_bufs; i++) {
>+               rte_pktmbuf_free(test_data->comp_bufs[i]);
>+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
>+       }
>+       rte_free(test_data->comp_bufs);
>+       rte_free(test_data->decomp_bufs);
>+}
>+
>+static int
>+main_loop(struct comp_test_data *test_data, uint8_t level,
>+                       enum rte_comp_xform_type type,
>+                       uint8_t *output_data_ptr,
>+                       size_t *output_data_sz,
>+                       unsigned int benchmarking)
>+{
>+       uint8_t dev_id = test_data->cdev_id;
>+       uint32_t i, iter, num_iter;
>+       struct rte_comp_op **ops, **deq_ops;
>+       void *priv_xform = NULL;
>+       struct rte_comp_xform xform;
>+       size_t output_size = 0;
>+       struct rte_mbuf **input_bufs, **output_bufs;
>+       int res = 0;
>+       int allocated = 0;
>+
>+       if (test_data == NULL || !test_data->burst_sz) {
>+               RTE_LOG(ERR, USER1,
>+                       "Unknow burst size\n");
>+               return -1;
>+       }
>+
>+       ops = rte_zmalloc_socket(NULL,
>+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
>+               0, rte_socket_id());
>+
>+       if (ops == NULL) {
>+               RTE_LOG(ERR, USER1,
>+                       "Can't allocate memory for ops strucures\n");
>+               return -1;
>+       }
>+
>+       deq_ops = &ops[test_data->total_bufs];
>+
>+       if (type == RTE_COMP_COMPRESS) {
>+               xform = (struct rte_comp_xform) {
>+                       .type = RTE_COMP_COMPRESS,
>+                       .compress = {
>+                               .algo = RTE_COMP_ALGO_DEFLATE,
>+                               .deflate.huffman = test_data->huffman_enc,
>+                               .level = level,
>+                               .window_size = test_data->window_sz,
>+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>+                       }
>+               };
>+               input_bufs = test_data->decomp_bufs;
>+               output_bufs = test_data->comp_bufs;
>+       } else {
>+               xform = (struct rte_comp_xform) {
>+                       .type = RTE_COMP_DECOMPRESS,
>+                       .decompress = {
>+                               .algo = RTE_COMP_ALGO_DEFLATE,
>+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>+                               .window_size = test_data->window_sz,
>+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>+                       }
>+               };
>+               input_bufs = test_data->comp_bufs;
>+               output_bufs = test_data->decomp_bufs;
>+       }
>+
>+       /* Create private xform */
>+       if (rte_compressdev_private_xform_create(dev_id, &xform,
>+                       &priv_xform) < 0) {
>+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
>+               res = -1;
>+               goto end;
>+       }
>+
>+       uint64_t tsc_start, tsc_end, tsc_duration;
>+
>+       tsc_start = tsc_end = tsc_duration = 0;
>+       if (benchmarking) {
>+               tsc_start = rte_rdtsc();
>+               num_iter = test_data->num_iter;
>+       } else
>+               num_iter = 1;
Looks like in same code we're doing benchmarking and functional validation. It can be reorganised to keep validation test separately like done in crypto_perf.

>+
>+       for (iter = 0; iter < num_iter; iter++) {
>+               uint32_t total_ops = test_data->total_bufs;
>+               uint32_t remaining_ops = test_data->total_bufs;
>+               uint32_t total_deq_ops = 0;
>+               uint32_t total_enq_ops = 0;
>+               uint16_t ops_unused = 0;
>+               uint16_t num_enq = 0;
>+               uint16_t num_deq = 0;
>+
>+               output_size = 0;
>+
>+               while (remaining_ops > 0) {
>+                       uint16_t num_ops = RTE_MIN(remaining_ops,
>+                                                  test_data->burst_sz);
>+                       uint16_t ops_needed = num_ops - ops_unused;
>+
>+                       /*
>+                        * Move the unused operations from the previous
>+                        * enqueue_burst call to the front, to maintain order
>+                        */
>+                       if ((ops_unused > 0) && (num_enq > 0)) {
>+                               size_t nb_b_to_mov =
>+                                     ops_unused * sizeof(struct rte_comp_op *);
>+
>+                               memmove(ops, &ops[num_enq], nb_b_to_mov);
>+                       }
>+
>+                       /* Allocate compression operations */
>+                       if (ops_needed && !rte_comp_op_bulk_alloc(
>+                                               test_data->op_pool,
>+                                               &ops[ops_unused],
>+                                               ops_needed)) {
>+                               RTE_LOG(ERR, USER1,
>+                                     "Could not allocate enough operations\n");
>+                               res = -1;
>+                               goto end;
>+                       }
>+                       allocated += ops_needed;
>+
>+                       for (i = 0; i < ops_needed; i++) {
>+                               /*
>+                                * Calculate next buffer to attach to operation
>+                                */
>+                               uint32_t buf_id = total_enq_ops + i +
>+                                               ops_unused;
>+                               uint16_t op_id = ops_unused + i;
>+                               /* Reset all data in output buffers */
>+                               struct rte_mbuf *m = output_bufs[buf_id];
>+
>+                               m->pkt_len = test_data->seg_sz * m->nb_segs;
Isn't pkt_len set already when we call rte_pktmbuf_append() and chain()?

>+                               while (m) {
>+                                       m->data_len = m->buf_len - m->data_off;
Same question, shouldn't rte_pktmbuf_append() adjust data_len as well per each mbuf?

>+                                       m = m->next;
>+                               }
>+                               ops[op_id]->m_src = input_bufs[buf_id];
>+                               ops[op_id]->m_dst = output_bufs[buf_id];
>+                               ops[op_id]->src.offset = 0;
>+                               ops[op_id]->src.length =
>+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
>+                               ops[op_id]->dst.offset = 0;
>+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
>+                               ops[op_id]->input_chksum = buf_id;
>+                               ops[op_id]->private_xform = priv_xform;
>+                       }
>+
>+                       num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
>+                                                               num_ops);
>+                       ops_unused = num_ops - num_enq;
>+                       remaining_ops -= num_enq;
>+                       total_enq_ops += num_enq;
>+
>+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
>+                                                          deq_ops,
>+                                                          test_data->burst_sz);
>+                       total_deq_ops += num_deq;
>+                       if (benchmarking == 0) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       const void *read_data_addr =
>+                                               rte_pktmbuf_read(op->m_dst, 0,
>+                                               op->produced, output_data_ptr);
>+                                       if (read_data_addr == NULL) {
>+                                               RTE_LOG(ERR, USER1,
>+                                     "Could not copy buffer in destination\n");
>+                                               res = -1;
>+                                               goto end;
>+                                       }
>+
>+                                       if (read_data_addr != output_data_ptr)
>+                                               rte_memcpy(output_data_ptr,
>+                                                       rte_pktmbuf_mtod(
>+                                                         op->m_dst, uint8_t *),
>+                                                       op->produced);
>+                                       output_data_ptr += op->produced;
>+                                       output_size += op->produced;
>+
>+                               }
>+                       }
>+
>+                       if (iter == num_iter - 1) {
>+                               for (i = 0; i < num_deq; i++) {
Why is it only for last iteration, we are adjusting dst mbuf data_len.?
Shouldn't it be done for each dequeued op?
And, for benchmarking, do we even need to set data and pkt len on dst mbuf?

>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       struct rte_mbuf *m = op->m_dst;
>+
>+                                       m->pkt_len = op->produced;
>+                                       uint32_t remaining_data = op->produced;
>+                                       uint16_t data_to_append;
>+
>+                                       while (remaining_data > 0) {
>+                                               data_to_append =
>+                                                       RTE_MIN(remaining_data,
>+                                                            test_data->seg_sz);
>+                                               m->data_len = data_to_append;
>+                                               remaining_data -=
>+                                                               data_to_append;
>+                                               m = m->next;
Should break if m->next == NULL
>+                                       }
>+                               }
>+                       }
>+                       rte_mempool_put_bulk(test_data->op_pool,
>+                                            (void **)deq_ops, num_deq);
>+                       allocated -= num_deq;
>+               }
>+
>+               /* Dequeue the last operations */
>+               while (total_deq_ops < total_ops) {
>+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
>+                                               deq_ops, test_data->burst_sz);
>+                       total_deq_ops += num_deq;
>+                       if (benchmarking == 0) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       const void *read_data_addr =
>+                                               rte_pktmbuf_read(op->m_dst, 0,
>+                                               op->produced, output_data_ptr);
>+                                       if (read_data_addr == NULL) {
>+                                               RTE_LOG(ERR, USER1,
>+                                     "Could not copy buffer in destination\n");
>+                                               res = -1;
>+                                               goto end;
>+                                       }
>+
>+                                       if (read_data_addr != output_data_ptr)
>+                                               rte_memcpy(output_data_ptr,
>+                                                       rte_pktmbuf_mtod(
>+                                                       op->m_dst, uint8_t *),
>+                                                       op->produced);
>+                                       output_data_ptr += op->produced;
>+                                       output_size += op->produced;
>+
>+                               }
>+                       }
>+
>+                       if (iter == num_iter - 1) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       struct rte_mbuf *m = op->m_dst;
>+
>+                                       m->pkt_len = op->produced;
>+                                       uint32_t remaining_data = op->produced;
>+                                       uint16_t data_to_append;
>+
>+                                       while (remaining_data > 0) {
>+                                               data_to_append =
>+                                               RTE_MIN(remaining_data,
>+                                                       test_data->seg_sz);
>+                                               m->data_len = data_to_append;
>+                                               remaining_data -=
>+                                                               data_to_append;
>+                                               m = m->next;
>+                                       }
>+                               }
>+                       }
>+                       rte_mempool_put_bulk(test_data->op_pool,
>+                                            (void **)deq_ops, num_deq);
>+                       allocated -= num_deq;
>+               }
>+       }
>+
>+       if (benchmarking) {
>+               tsc_end = rte_rdtsc();
>+               tsc_duration = tsc_end - tsc_start;
>+
>+               if (type == RTE_COMP_COMPRESS)
test looks for stateless operations only, so can we add perf test type like: test type perf, op type:STATELESS/STATEFUL
Also, why do we need --max-num-sgl-segs as an input option from user? Shouldn't input_sz and seg_sz internally decide on num-segs?
Or is it added to serve some other different purpose?

Thanks
Shally

>+                       test_data->comp_tsc_duration[level] =
>+                                       tsc_duration / num_iter;
>+               else
>+                       test_data->decomp_tsc_duration[level] =
>+                                       tsc_duration / num_iter;
>+       }
>+
>+       if (benchmarking == 0 && output_data_sz)
>+               *output_data_sz = output_size;
>+end:
>+       rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
>+       rte_compressdev_private_xform_free(dev_id, priv_xform);
>+       rte_free(ops);
>+       return res;
>+}
>+
> int
> main(int argc, char **argv)
> {
>+       uint8_t level, level_idx = 0;
>+       uint8_t i;
>        int ret;
>        struct comp_test_data *test_data;
>
>@@ -43,9 +751,145 @@ main(int argc, char **argv)
>                goto err;
>        }
>
>+       if (comp_perf_initialize_compressdev(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       if (comp_perf_dump_input_data(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       if (comp_perf_allocate_memory(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       if (prepare_bufs(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       if (test_data->level.inc != 0)
>+               level = test_data->level.min;
>+       else
>+               level = test_data->level.list[0];
>+
>+       size_t comp_data_sz;
>+       size_t decomp_data_sz;
>+
>+       printf("Burst size = %u\n", test_data->burst_sz);
>+       printf("File size = %zu\n", test_data->input_data_sz);
>+
>+       printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
>+               "Level", "Comp size", "Comp ratio [%]",
>+               "Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
>+               "Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
>+
>+       while (level <= test_data->level.max) {
>+               /*
>+                * Run a first iteration, to verify compression and
>+                * get the compression ratio for the level
>+                */
>+               if (main_loop(test_data, level, RTE_COMP_COMPRESS,
>+                             test_data->compressed_data,
>+                             &comp_data_sz, 0) < 0) {
>+                       ret = EXIT_FAILURE;
>+                       goto err;
>+               }
>+
>+               if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
>+                             test_data->decompressed_data,
>+                             &decomp_data_sz, 0) < 0) {
>+                       ret = EXIT_FAILURE;
>+                       goto err;
>+               }
>+
>+               if (decomp_data_sz != test_data->input_data_sz) {
>+                       RTE_LOG(ERR, USER1,
>+                  "Decompressed data length not equal to input data length\n");
>+                       RTE_LOG(ERR, USER1,
>+                               "Decompressed size = %zu, expected = %zu\n",
>+                               decomp_data_sz, test_data->input_data_sz);
>+                       ret = EXIT_FAILURE;
>+                       goto err;
>+               } else {
>+                       if (memcmp(test_data->decompressed_data,
>+                                       test_data->input_data,
>+                                       test_data->input_data_sz) != 0) {
>+                               RTE_LOG(ERR, USER1,
>+                           "Decompressed data is not the same as file data\n");
>+                               ret = EXIT_FAILURE;
>+                               goto err;
>+                       }
>+               }
>+
>+               double ratio = (double) comp_data_sz /
>+                                               test_data->input_data_sz * 100;
>+
>+               /*
>+                * Run the tests twice, discarding the first performance
>+                * results, before the cache is warmed up
>+                */
>+               for (i = 0; i < 2; i++) {
>+                       if (main_loop(test_data, level, RTE_COMP_COMPRESS,
>+                                       NULL, NULL, 1) < 0) {
>+                               ret = EXIT_FAILURE;
>+                               goto err;
>+                       }
>+               }
>+
>+               for (i = 0; i < 2; i++) {
>+                       if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
>+                                       NULL, NULL, 1) < 0) {
>+                               ret = EXIT_FAILURE;
>+                               goto err;
>+                       }
>+               }
>+
>+               uint64_t comp_tsc_duration =
>+                               test_data->comp_tsc_duration[level];
>+               double comp_tsc_byte = (double)comp_tsc_duration /
>+                                               test_data->input_data_sz;
>+               double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
>+                               1000000000;
>+               uint64_t decomp_tsc_duration =
>+                               test_data->decomp_tsc_duration[level];
>+               double decomp_tsc_byte = (double)decomp_tsc_duration /
>+                                               test_data->input_data_sz;
>+               double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
>+                               1000000000;
>+
>+               printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
>+                                       "%15.2f%21"PRIu64"%23.2f%16.2f\n",
>+                      level, comp_data_sz, ratio, comp_tsc_duration,
>+                      comp_tsc_byte, comp_gbps, decomp_tsc_duration,
>+                      decomp_tsc_byte, decomp_gbps);
>+
>+               if (test_data->level.inc != 0)
>+                       level += test_data->level.inc;
>+               else {
>+                       if (++level_idx == test_data->level.count)
>+                               break;
>+                       level = test_data->level.list[level_idx];
>+               }
>+       }
>+
>        ret = EXIT_SUCCESS;
>
> err:
>+       if (test_data->cdev_id != -1)
>+               rte_compressdev_stop(test_data->cdev_id);
>+
>+       free_bufs(test_data);
>+       rte_free(test_data->compressed_data);
>+       rte_free(test_data->decompressed_data);
>+       rte_free(test_data->input_data);
>+       rte_mempool_free(test_data->comp_buf_pool);
>+       rte_mempool_free(test_data->decomp_buf_pool);
>+       rte_mempool_free(test_data->op_pool);
>+
>        rte_free(test_data);
>
>        return ret;
>--
>2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-12 10:15   ` Verma, Shally
@ 2018-10-15 15:10     ` Daly, Lee
  2018-10-16  5:18       ` Verma, Shally
  2018-10-17 14:33       ` Trahe, Fiona
  2018-11-02  9:59     ` Jozwiak, TomaszX
  1 sibling, 2 replies; 76+ messages in thread
From: Daly, Lee @ 2018-10-15 15:10 UTC (permalink / raw)
  To: Verma, Shally; +Cc: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal

Thanks for your input Shally see comments below.


I will be reviewing these changes while Tomasz is out this week.

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
> Sent: Friday, October 12, 2018 11:16 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> HI TomaszX
> 
> Sorry for delay in response. Comments inline.
> 

<...>
> >+static int
> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >+       const struct rte_compressdev_capabilities *cap;
> >+
> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >+                                            RTE_COMP_ALGO_DEFLATE);
> >+
> >+       if (cap == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not support DEFLATE\n");
> >+               return -1;
> >+       }
> >+
> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >+
> >+       /* Huffman enconding */
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Fixed Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Dynamic Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       /* Window size */
> >+       if (test_data->window_sz != -1) {
> >+               if (param_range_check(test_data->window_sz,
> >+ &cap->window_size)
> What if cap->window_size is 0 i.e. implementation default?
What do you mean when you say cap->window_size = 0?
Cap->window_size is the range structure here, min, max and increment, which are filled out by the driver.
Our implementation default in the perf tool will set the window size to max the driver can support.

> 
> >+                               < 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Compress device does not support "
> >+                               "this window size\n");
> >+                       return -1;
> >+               }
> >+       } else
> >+               /* Set window size to PMD maximum if none was specified */
> >+               test_data->window_sz = cap->window_size.max;
> >+

<...>
> >+
> >+static int
> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
> >+       FILE *f = fopen(test_data->input_file, "r");
> >+
> >+       if (f == NULL) {
> >+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
> >+               return -1;
> >+       }
> >+
> >+       if (fseek(f, 0, SEEK_END) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto err;
> >+       }
> >+       size_t actual_file_sz = ftell(f);
> >+       /* If extended input data size has not been set,
> >+        * input data size = file size
> >+        */
> >+
> >+       if (test_data->input_data_sz == 0)
> >+               test_data->input_data_sz = actual_file_sz;
> >+
> >+       if (fseek(f, 0, SEEK_SET) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto err;
> >+       }
> >+
> >+       test_data->input_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+ rte_socket_id());
> >+
> >+       if (test_data->input_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               goto err;
> >+       }
> >+
> >+       size_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *data = test_data->input_data;
> >+
> >+       while (remaining_data > 0) {
> >+               size_t data_to_read = RTE_MIN(remaining_data,
> >+ actual_file_sz);
> >+
> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
> >+                       goto err;
> >+               }
> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Size of input could not be calculated\n");
> >+                       goto err;
> >+               }
> >+               remaining_data -= data_to_read;
> >+               data += data_to_read;
> It looks like it will run 2nd time only if input file size < input data size in which
> case it will just keep filling input buffer with repeated data.
> Is that the intention here?
>From what I can see, yes, this will only enter this while loop a second time if the file is smaller than the data_size requested.
Repeating the data from your input file as much as requested. 
If we were to pad with 0's or random data it would skew the ratio a lot.
Even though I do understand the ratio may be better here in this case as well, due to the repetition of data.

> 
> >+       }
> >+
> >+       if (test_data->input_data_sz > actual_file_sz)
> >+               RTE_LOG(INFO, USER1,
> >+                 "%zu bytes read from file %s, extending the file %.2f times\n",
> >+                       test_data->input_data_sz, test_data->input_file,
> >+                       (double)test_data->input_data_sz/actual_file_sz);
> >+       else
> >+               RTE_LOG(INFO, USER1,
> >+                       "%zu bytes read from file %s\n",
> >+                       test_data->input_data_sz,
> >+ test_data->input_file);
> >+
> >+       fclose(f);
> >+
> >+       return 0;
> >+
> >+err:
> >+       fclose(f);
> >+       rte_free(test_data->input_data);
> >+       test_data->input_data = NULL;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
> >+       uint8_t enabled_cdev_count;
> >+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
> >+
> >+       enabled_cdev_count = rte_compressdev_devices_get(test_data-
> >driver_name,
> >+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
> >+       if (enabled_cdev_count == 0) {
> >+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
> >+                               test_data->driver_name);
> >+               return -EINVAL;
> >+       }
> >+
> >+       if (enabled_cdev_count > 1)
> >+               RTE_LOG(INFO, USER1,
> >+                       "Only the first compress device will be
> >+ used\n");
> >+
> >+       test_data->cdev_id = enabled_cdevs[0];
> >+
> >+       if (comp_perf_check_capabilities(test_data) < 0)
> >+               return -1;
> >+
> >+       /* Configure compressdev (one device, one queue pair) */
> >+       struct rte_compressdev_config config = {
> >+               .socket_id = rte_socket_id(),
> >+               .nb_queue_pairs = 1,
> >+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
> >+               .max_nb_streams = 0
> >+       };
> >+
> >+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
> >+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
> >+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device could not be started\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+prepare_bufs(struct comp_test_data *test_data) {
> >+       uint32_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *input_data_ptr = test_data->input_data;
> >+       size_t data_sz;
> >+       uint8_t *data_addr;
> >+       uint32_t i, j;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               /* Allocate data in input mbuf and copy data from input file */
> >+               test_data->decomp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+               if (test_data->decomp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+
> >+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->decomp_bufs[i], data_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                       return -1;
> >+               }
> >+               rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+
> >+               input_data_ptr += data_sz;
> >+               remaining_data -= data_sz;
> >+
> >+               /* Already one segment in the mbuf */
> >+               uint16_t segs_per_mbuf = 1;
> >+
> >+               /* Chain mbufs if needed for input mbufs */
> >+               while (segs_per_mbuf < test_data->max_sgl_segs
> >+                               && remaining_data > 0) {
> >+                       struct rte_mbuf *next_seg =
> >+
> >+ rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+
> >+                       if (next_seg == NULL) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                       "Could not allocate mbuf\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >+                               data_sz);
> >+
> >+                       if (data_addr == NULL) {
> >+                               RTE_LOG(ERR, USER1, "Could not append
> >+ data\n");
> Since a new buffer per segment is allocated, so is it possible for append to
> fail? think, this check is redundant here.

True.

> >+                               return -1;
> >+                       }
> >+
> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+                       input_data_ptr += data_sz;
> >+                       remaining_data -= data_sz;
> >+
> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
> >+                                       next_seg) < 0) {
> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >+                               return -1;
> >+                       }
> >+                       segs_per_mbuf++;
> >+               }
> >+

<...>
> >+
> >+       /* Create private xform */
> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
> >+                       &priv_xform) < 0) {
> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
> >+               res = -1;
> >+               goto end;
> >+       }
> >+
> >+       uint64_t tsc_start, tsc_end, tsc_duration;
> >+
> >+       tsc_start = tsc_end = tsc_duration = 0;
> >+       if (benchmarking) {
> >+               tsc_start = rte_rdtsc();
> >+               num_iter = test_data->num_iter;
> >+       } else
> >+               num_iter = 1;
> Looks like in same code we're doing benchmarking and functional validation.
> It can be reorganised to keep validation test separately like done in
> crypto_perf.
Good point, will keep that in mind when doing these changes.

> 
> >+
> >+       for (iter = 0; iter < num_iter; iter++) {
> >+               uint32_t total_ops = test_data->total_bufs;
> >+               uint32_t remaining_ops = test_data->total_bufs;
> >+               uint32_t total_deq_ops = 0;
> >+               uint32_t total_enq_ops = 0;
> >+               uint16_t ops_unused = 0;
> >+               uint16_t num_enq = 0;
> >+               uint16_t num_deq = 0;
> >+
> >+               output_size = 0;
> >+
> >+               while (remaining_ops > 0) {
> >+                       uint16_t num_ops = RTE_MIN(remaining_ops,
> >+                                                  test_data->burst_sz);
> >+                       uint16_t ops_needed = num_ops - ops_unused;
> >+
> >+                       /*
> >+                        * Move the unused operations from the previous
> >+                        * enqueue_burst call to the front, to maintain order
> >+                        */
> >+                       if ((ops_unused > 0) && (num_enq > 0)) {
> >+                               size_t nb_b_to_mov =
> >+                                     ops_unused * sizeof(struct
> >+ rte_comp_op *);
> >+
> >+                               memmove(ops, &ops[num_enq], nb_b_to_mov);
> >+                       }
> >+
> >+                       /* Allocate compression operations */
> >+                       if (ops_needed && !rte_comp_op_bulk_alloc(
> >+                                               test_data->op_pool,
> >+                                               &ops[ops_unused],
> >+                                               ops_needed)) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                     "Could not allocate enough operations\n");
> >+                               res = -1;
> >+                               goto end;
> >+                       }
> >+                       allocated += ops_needed;
> >+
> >+                       for (i = 0; i < ops_needed; i++) {
> >+                               /*
> >+                                * Calculate next buffer to attach to operation
> >+                                */
> >+                               uint32_t buf_id = total_enq_ops + i +
> >+                                               ops_unused;
> >+                               uint16_t op_id = ops_unused + i;
> >+                               /* Reset all data in output buffers */
> >+                               struct rte_mbuf *m =
> >+ output_bufs[buf_id];
> >+
> >+                               m->pkt_len = test_data->seg_sz *
> >+ m->nb_segs;
> Isn't pkt_len set already when we call rte_pktmbuf_append() and chain()?
> 
> >+                               while (m) {
> >+                                       m->data_len = m->buf_len -
> >+ m->data_off;
> Same question, shouldn't rte_pktmbuf_append() adjust data_len as well per
> each mbuf?
Yes you are correct,
>From what I can see the *m mbuf pointer is redundant. 

> 
> >+                                       m = m->next;
> >+                               }
> >+                               ops[op_id]->m_src = input_bufs[buf_id];
> >+                               ops[op_id]->m_dst = output_bufs[buf_id];
> >+                               ops[op_id]->src.offset = 0;
> >+                               ops[op_id]->src.length =
> >+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
> >+                               ops[op_id]->dst.offset = 0;
> >+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
> >+                               ops[op_id]->input_chksum = buf_id;
> >+                               ops[op_id]->private_xform = priv_xform;
> >+                       }
> >+
> >+                       num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
> >+                                                               num_ops);
> >+                       ops_unused = num_ops - num_enq;
> >+                       remaining_ops -= num_enq;
> >+                       total_enq_ops += num_enq;
> >+
> >+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
> >+                                                          deq_ops,
> >+                                                          test_data->burst_sz);
> >+                       total_deq_ops += num_deq;
> >+                       if (benchmarking == 0) {
> >+                               for (i = 0; i < num_deq; i++) {
> >+                                       struct rte_comp_op *op = deq_ops[i];
> >+                                       const void *read_data_addr =
> >+                                               rte_pktmbuf_read(op->m_dst, 0,
> >+                                               op->produced, output_data_ptr);
> >+                                       if (read_data_addr == NULL) {
> >+                                               RTE_LOG(ERR, USER1,
> >+                                     "Could not copy buffer in destination\n");
> >+                                               res = -1;
> >+                                               goto end;
> >+                                       }
> >+
> >+                                       if (read_data_addr != output_data_ptr)
> >+                                               rte_memcpy(output_data_ptr,
> >+                                                       rte_pktmbuf_mtod(
> >+                                                         op->m_dst, uint8_t *),
> >+                                                       op->produced);
> >+                                       output_data_ptr += op->produced;
> >+                                       output_size += op->produced;
> >+
> >+                               }
> >+                       }
> >+
> >+                       if (iter == num_iter - 1) {
> >+                               for (i = 0; i < num_deq; i++) {
> Why is it only for last iteration, we are adjusting dst mbuf data_len.?
> Shouldn't it be done for each dequeued op?
> And, for benchmarking, do we even need to set data and pkt len on dst
> mbuf?
I assume the data_len is only getting changed on the last iteration, for the reason you gave, benchmarking, to save cycles.
Does it need to be at all? Possibly not. 
> 
> >+                                       struct rte_comp_op *op = deq_ops[i];
> >+                                       struct rte_mbuf *m = op->m_dst;
> >+
> >+                                       m->pkt_len = op->produced;
> >+                                       uint32_t remaining_data = op->produced;
> >+                                       uint16_t data_to_append;
> >+
> >+                                       while (remaining_data > 0) {
> >+                                               data_to_append =
> >+                                                       RTE_MIN(remaining_data,
> >+                                                            test_data->seg_sz);
> >+                                               m->data_len = data_to_append;
> >+                                               remaining_data -=
> >+                                                               data_to_append;
> >+                                               m = m->next;
> Should break if m->next == NULL
Yup, you are correct, should be a check here.
> >+                                       }
> >+                               }
> >+                       }
> >+                       rte_mempool_put_bulk(test_data->op_pool,
> >+                                            (void **)deq_ops, num_deq);
> >+                       allocated -= num_deq;
> >+               }
> >+
> >+               /* Dequeue the last operations */
> >+               while (total_deq_ops < total_ops) {
> >+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
> >+                                               deq_ops, test_data->burst_sz);
> >+                       total_deq_ops += num_deq;
> >+                       if (benchmarking == 0) {
> >+                               for (i = 0; i < num_deq; i++) {
> >+                                       struct rte_comp_op *op = deq_ops[i];
> >+                                       const void *read_data_addr =
> >+                                               rte_pktmbuf_read(op->m_dst, 0,
> >+                                               op->produced, output_data_ptr);
> >+                                       if (read_data_addr == NULL) {
> >+                                               RTE_LOG(ERR, USER1,
> >+                                     "Could not copy buffer in destination\n");
> >+                                               res = -1;
> >+                                               goto end;
> >+                                       }
> >+
> >+                                       if (read_data_addr != output_data_ptr)
> >+                                               rte_memcpy(output_data_ptr,
> >+                                                       rte_pktmbuf_mtod(
> >+                                                       op->m_dst, uint8_t *),
> >+                                                       op->produced);
> >+                                       output_data_ptr += op->produced;
> >+                                       output_size += op->produced;
> >+
> >+                               }
> >+                       }
> >+
> >+                       if (iter == num_iter - 1) {
> >+                               for (i = 0; i < num_deq; i++) {
> >+                                       struct rte_comp_op *op = deq_ops[i];
> >+                                       struct rte_mbuf *m = op->m_dst;
> >+
> >+                                       m->pkt_len = op->produced;
> >+                                       uint32_t remaining_data = op->produced;
> >+                                       uint16_t data_to_append;
> >+
> >+                                       while (remaining_data > 0) {
> >+                                               data_to_append =
> >+                                               RTE_MIN(remaining_data,
> >+                                                       test_data->seg_sz);
> >+                                               m->data_len = data_to_append;
> >+                                               remaining_data -=
> >+                                                               data_to_append;
> >+                                               m = m->next;
> >+                                       }
> >+                               }
> >+                       }
> >+                       rte_mempool_put_bulk(test_data->op_pool,
> >+                                            (void **)deq_ops, num_deq);
> >+                       allocated -= num_deq;
> >+               }
> >+       }
> >+
> >+       if (benchmarking) {
> >+               tsc_end = rte_rdtsc();
> >+               tsc_duration = tsc_end - tsc_start;
> >+
> >+               if (type == RTE_COMP_COMPRESS)
> test looks for stateless operations only, so can we add perf test type like: test
> type perf, op type:STATELESS/STATEFUL 
Are you asking for the tool to support stateful ops? Since no drivers support stateful yet 
We just wanted to ensure current driver functionality was covered with this first version.

>Also, why do we need --max-num-
> sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
> internally decide on num-segs?
> Or is it added to serve some other different purpose?
Will have to get back to you on this one, seems illogical to get this input from user,
But I will have to do further investigation to find if there was a different purpose. 
> 
> Thanks
> Shally
> 
Thanks for the feedback, 
We hope to get V2 sent asap.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-15 15:10     ` Daly, Lee
@ 2018-10-16  5:18       ` Verma, Shally
  2018-10-17 14:33       ` Trahe, Fiona
  1 sibling, 0 replies; 76+ messages in thread
From: Verma, Shally @ 2018-10-16  5:18 UTC (permalink / raw)
  To: Daly, Lee; +Cc: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal



>-----Original Message-----
>From: Daly, Lee <lee.daly@intel.com>
>Sent: 15 October 2018 20:40
>To: Verma, Shally <Shally.Verma@cavium.com>
>Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Thanks for your input Shally see comments below.
>
>
>I will be reviewing these changes while Tomasz is out this week.
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
>> Sent: Friday, October 12, 2018 11:16 AM
>> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
>> <pablo.de.lara.guarch@intel.com>
>> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> measurement
>>
>> HI TomaszX
>>
>> Sorry for delay in response. Comments inline.
>>
>
><...>
>> >+static int
>> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
>> >+       const struct rte_compressdev_capabilities *cap;
>> >+
>> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>> >+                                            RTE_COMP_ALGO_DEFLATE);
>> >+
>> >+       if (cap == NULL) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not support DEFLATE\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       uint64_t comp_flags = cap->comp_feature_flags;
>> >+
>> >+       /* Huffman enconding */
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Fixed Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Dynamic Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       /* Window size */
>> >+       if (test_data->window_sz != -1) {
>> >+               if (param_range_check(test_data->window_sz,
>> >+ &cap->window_size)
>> What if cap->window_size is 0 i.e. implementation default?
>What do you mean when you say cap->window_size = 0?
>Cap->window_size is the range structure here, min, max and increment, which are filled out by the driver.
>Our implementation default in the perf tool will set the window size to max the driver can support.
If I recall and if I am not mixing my memories, I  believe, we added a condition in lib where driver can set window sz , min = 0 or max = 0 to just mark implementation default. If that's not the case supported yet on lib, then you can ignore this comment.
>
...

>> It looks like it will run 2nd time only if input file size < input data size in which
>> case it will just keep filling input buffer with repeated data.
>> Is that the intention here?
>From what I can see, yes, this will only enter this while loop a second time if the file is smaller than the data_size requested.
>Repeating the data from your input file as much as requested.
>If we were to pad with 0's or random data it would skew the ratio a lot.
>Even though I do understand the ratio may be better here in this case as well, due to the repetition of data.
>
Yea. So I think not to influence benchmark data here. we should stick to input filesz user is giving. As performance
at a particular level will vary by content type so lets app choose and find out performance for a given content type.

>>
...

>> >+       if (benchmarking) {
>> >+               tsc_end = rte_rdtsc();
>> >+               tsc_duration = tsc_end - tsc_start;
>> >+
>> >+               if (type == RTE_COMP_COMPRESS)
>> test looks for stateless operations only, so can we add perf test type like: test
>> type perf, op type:STATELESS/STATEFUL
>Are you asking for the tool to support stateful ops? Since no drivers support stateful yet
>We just wanted to ensure current driver functionality was covered with this first version.
Since it's an app so should be generic enough to be extensible for stateful benchmarking.
So, either we name app as test_comp_benchmark_statless or we make it generic to handling both, would be my suggestion.

Thanks
Shally
>
>>Also, why do we need --max-num-
>> sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
>> internally decide on num-segs?
>> Or is it added to serve some other different purpose?
>Will have to get back to you on this one, seems illogical to get this input from user,
>But I will have to do further investigation to find if there was a different purpose.
>>
>> Thanks
>> Shally
>>
>Thanks for the feedback,
>We hope to get V2 sent asap.
>

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 3/3] doc/guides/tools: add doc file
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 3/3] doc/guides/tools: add doc file Tomasz Jozwiak
@ 2018-10-16  9:26   ` Kovacevic, Marko
  0 siblings, 0 replies; 76+ messages in thread
From: Kovacevic, Marko @ 2018-10-16  9:26 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Jozwiak, TomaszX,
	akhil.goyal, De Lara Guarch, Pablo
  Cc: De, Lara, Guarch

> Added initial version of compression performance test description file.
> 
> Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> ---
>  MAINTAINERS                    |  5 +++
>  doc/guides/tools/comp_perf.rst | 73


You forgot the index file. :)

dpdk/doc/guides/tools/comp_perf.rst: WARNING: document isn't included in any toctree

Marko K

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-15 15:10     ` Daly, Lee
  2018-10-16  5:18       ` Verma, Shally
@ 2018-10-17 14:33       ` Trahe, Fiona
  2018-10-17 15:42         ` Verma, Shally
  1 sibling, 1 reply; 76+ messages in thread
From: Trahe, Fiona @ 2018-10-17 14:33 UTC (permalink / raw)
  To: Daly, Lee, Verma, Shally; +Cc: Jozwiak, TomaszX, dev, akhil.goyal, Trahe, Fiona

Hi Shally, Lee,

> -----Original Message-----
> From: Daly, Lee
> Sent: Monday, October 15, 2018 8:10 AM
> To: Verma, Shally <Shally.Verma@cavium.com>
> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe, Fiona
> <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
> 
> Thanks for your input Shally see comments below.
> 
> 
> I will be reviewing these changes while Tomasz is out this week.
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
> > Sent: Friday, October 12, 2018 11:16 AM
> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> > Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
> > <pablo.de.lara.guarch@intel.com>
> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> > measurement
> >
///

> >Also, why do we need --max-num-
> > sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
> > internally decide on num-segs?
> > Or is it added to serve some other different purpose?
> Will have to get back to you on this one, seems illogical to get this input from user,
> But I will have to do further investigation to find if there was a different purpose.

[Fiona] Some PMDs have a limit on how many links can be in an sgl chain, e.g. in QAT case the 
PMD allocates a pool of internal structures of a suitable size during device initialisation, this is not 
a hard limit but can be configured in .config to give the user control over the memory resources allocated.
This perf-tool max-num-sgl-segs is so the user can pick a value <= whatever the PMD's max is.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-17 14:33       ` Trahe, Fiona
@ 2018-10-17 15:42         ` Verma, Shally
  2018-10-17 16:45           ` Trahe, Fiona
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-10-17 15:42 UTC (permalink / raw)
  To: Trahe, Fiona, Daly, Lee; +Cc: Jozwiak, TomaszX, dev, akhil.goyal



>-----Original Message-----
>From: Trahe, Fiona <fiona.trahe@intel.com>
>Sent: 17 October 2018 20:04
>To: Daly, Lee <lee.daly@intel.com>; Verma, Shally <Shally.Verma@cavium.com>
>Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com; Trahe, Fiona <fiona.trahe@intel.com>
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Hi Shally, Lee,
>
>> -----Original Message-----
>> From: Daly, Lee
>> Sent: Monday, October 15, 2018 8:10 AM
>> To: Verma, Shally <Shally.Verma@cavium.com>
>> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe, Fiona
>> <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>>
>> Thanks for your input Shally see comments below.
>>
>>
>> I will be reviewing these changes while Tomasz is out this week.
>>
>> > -----Original Message-----
>> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
>> > Sent: Friday, October 12, 2018 11:16 AM
>> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> > Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
>> > <pablo.de.lara.guarch@intel.com>
>> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> > measurement
>> >
>///
>
>> >Also, why do we need --max-num-
>> > sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
>> > internally decide on num-segs?
>> > Or is it added to serve some other different purpose?
>> Will have to get back to you on this one, seems illogical to get this input from user,
>> But I will have to do further investigation to find if there was a different purpose.
>
>[Fiona] Some PMDs have a limit on how many links can be in an sgl chain, e.g. in QAT case the
>PMD allocates a pool of internal structures of a suitable size during device initialisation, this is not
>a hard limit but can be configured in .config to give the user control over the memory resources allocated.
>This perf-tool max-num-sgl-segs is so the user can pick a value <= whatever the PMD's max is.

Then also, I believe this could be taken care internally by an app.
App can choose convenient number of sgl segs as per PMD capability and input sz and chunk sz selected by user.
Just my thoughts.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-17 15:42         ` Verma, Shally
@ 2018-10-17 16:45           ` Trahe, Fiona
  2018-10-17 16:47             ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Trahe, Fiona @ 2018-10-17 16:45 UTC (permalink / raw)
  To: Verma, Shally, Daly, Lee; +Cc: Jozwiak, TomaszX, dev, akhil.goyal, Trahe, Fiona



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Wednesday, October 17, 2018 8:43 AM
> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
> 
> 
> 
> >-----Original Message-----
> >From: Trahe, Fiona <fiona.trahe@intel.com>
> >Sent: 17 October 2018 20:04
> >To: Daly, Lee <lee.daly@intel.com>; Verma, Shally <Shally.Verma@cavium.com>
> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com; Trahe, Fiona
> <fiona.trahe@intel.com>
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
> >
> >External Email
> >
> >Hi Shally, Lee,
> >
> >> -----Original Message-----
> >> From: Daly, Lee
> >> Sent: Monday, October 15, 2018 8:10 AM
> >> To: Verma, Shally <Shally.Verma@cavium.com>
> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe, Fiona
> >> <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
> >>
> >> Thanks for your input Shally see comments below.
> >>
> >>
> >> I will be reviewing these changes while Tomasz is out this week.
> >>
> >> > -----Original Message-----
> >> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
> >> > Sent: Friday, October 12, 2018 11:16 AM
> >> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> >> > Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
> >> > <pablo.de.lara.guarch@intel.com>
> >> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >> > measurement
> >> >
> >///
> >
> >> >Also, why do we need --max-num-
> >> > sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
> >> > internally decide on num-segs?
> >> > Or is it added to serve some other different purpose?
> >> Will have to get back to you on this one, seems illogical to get this input from user,
> >> But I will have to do further investigation to find if there was a different purpose.
> >
> >[Fiona] Some PMDs have a limit on how many links can be in an sgl chain, e.g. in QAT case the
> >PMD allocates a pool of internal structures of a suitable size during device initialisation, this is not
> >a hard limit but can be configured in .config to give the user control over the memory resources allocated.
> >This perf-tool max-num-sgl-segs is so the user can pick a value <= whatever the PMD's max is.
> 
> Then also, I believe this could be taken care internally by an app.
> App can choose convenient number of sgl segs as per PMD capability and input sz and chunk sz selected by
> user.
> Just my thoughts.
[Fiona] Then we'd need to add this capability to the API, e.g. add uint16_t max_nb_segments_per_sgl
into the rte_compressdev_info struct.
Special case 0 means no limit.
We did consider this before, I can't remember why we didn't do it, I think it's needed.
I'll push an API patch for this in 19.02 and we can remove the --max-num-sgl-segs param from 
the performance tool and hardcode it in the tool in the meantime.
Ok?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-17 16:45           ` Trahe, Fiona
@ 2018-10-17 16:47             ` Verma, Shally
  2018-11-30 14:43               ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-10-17 16:47 UTC (permalink / raw)
  To: Trahe, Fiona, Daly, Lee; +Cc: Jozwiak, TomaszX, dev, akhil.goyal



>-----Original Message-----
>From: Trahe, Fiona <fiona.trahe@intel.com>
>Sent: 17 October 2018 22:15
>To: Verma, Shally <Shally.Verma@cavium.com>; Daly, Lee <lee.daly@intel.com>
>Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com; Trahe, Fiona <fiona.trahe@intel.com>
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Wednesday, October 17, 2018 8:43 AM
>> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
>> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>>
>>
>>
>> >-----Original Message-----
>> >From: Trahe, Fiona <fiona.trahe@intel.com>
>> >Sent: 17 October 2018 20:04
>> >To: Daly, Lee <lee.daly@intel.com>; Verma, Shally <Shally.Verma@cavium.com>
>> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; akhil.goyal@nxp.com; Trahe, Fiona
>> <fiona.trahe@intel.com>
>> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>> >
>> >External Email
>> >
>> >Hi Shally, Lee,
>> >
>> >> -----Original Message-----
>> >> From: Daly, Lee
>> >> Sent: Monday, October 15, 2018 8:10 AM
>> >> To: Verma, Shally <Shally.Verma@cavium.com>
>> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe, Fiona
>> >> <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>> >>
>> >> Thanks for your input Shally see comments below.
>> >>
>> >>
>> >> I will be reviewing these changes while Tomasz is out this week.
>> >>
>> >> > -----Original Message-----
>> >> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma, Shally
>> >> > Sent: Friday, October 12, 2018 11:16 AM
>> >> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> >> > Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
>> >> > <pablo.de.lara.guarch@intel.com>
>> >> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> >> > measurement
>> >> >
>> >///
>> >
>> >> >Also, why do we need --max-num-
>> >> > sgl-segs as an input option from user? Shouldn't input_sz and seg_sz
>> >> > internally decide on num-segs?
>> >> > Or is it added to serve some other different purpose?
>> >> Will have to get back to you on this one, seems illogical to get this input from user,
>> >> But I will have to do further investigation to find if there was a different purpose.
>> >
>> >[Fiona] Some PMDs have a limit on how many links can be in an sgl chain, e.g. in QAT case the
>> >PMD allocates a pool of internal structures of a suitable size during device initialisation, this is not
>> >a hard limit but can be configured in .config to give the user control over the memory resources allocated.
>> >This perf-tool max-num-sgl-segs is so the user can pick a value <= whatever the PMD's max is.
>>
>> Then also, I believe this could be taken care internally by an app.
>> App can choose convenient number of sgl segs as per PMD capability and input sz and chunk sz selected by
>> user.
>> Just my thoughts.
>[Fiona] Then we'd need to add this capability to the API, e.g. add uint16_t max_nb_segments_per_sgl
>into the rte_compressdev_info struct.
>Special case 0 means no limit.
>We did consider this before, I can't remember why we didn't do it, I think it's needed.
>I'll push an API patch for this in 19.02 and we can remove the --max-num-sgl-segs param from
>the performance tool and hardcode it in the tool in the meantime.
>Ok?
Yea. Sounds better.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf
  2018-10-01 13:27 [dpdk-dev] [PATCH 0/3] app: add initial version of compress-perf Tomasz Jozwiak
                   ` (2 preceding siblings ...)
  2018-10-01 13:27 ` [dpdk-dev] [PATCH 3/3] doc/guides/tools: add doc file Tomasz Jozwiak
@ 2018-11-02  9:43 ` Tomasz Jozwiak
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser Tomasz Jozwiak
                     ` (4 more replies)
  3 siblings, 5 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-02  9:43 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation

Opens:  comment from Shally Verma re separating validation from
        benchmarking will be investigated in a later release.
        Support for dynamic Huffman encoding will be added
        in a later release.

Tomasz Jozwiak (3):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files

 MAINTAINERS                                      |   5 +
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 ++
 app/test-compress-perf/comp_perf_options_parse.c | 592 +++++++++++++++
 app/test-compress-perf/main.c                    | 928 +++++++++++++++++++++++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 doc/guides/rel_notes/release_18_11.rst           |   6 +
 doc/guides/tools/comp_perf.rst                   |  75 ++
 11 files changed, 1698 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
@ 2018-11-02  9:44   ` Tomasz Jozwiak
  2018-11-05  8:40     ` Verma, Shally
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-02  9:44 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 740 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index a9a026b..47a2a86 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..bef4d2f
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,596 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 65535)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
+		"		(default: dynamic)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (strcmp(test_data->driver_name, "") == 0) {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (strcmp(test_data->input_file, "") == 0) {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index d12ae98..2ab4b7b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-11-02  9:44   ` Tomasz Jozwiak
  2018-11-05  8:56     ` Verma, Shally
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files Tomasz Jozwiak
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-02  9:44 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c |   8 +-
 app/test-compress-perf/main.c                    | 886 ++++++++++++++++++++++-
 2 files changed, 883 insertions(+), 11 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index bef4d2f..e5da3ad 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
-		},
-		{
-			"dynamic",
-			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
-	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..e3f4bf6 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,14 +5,728 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_ISAL_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+/* Cleanup state machine */
+static enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_PREPARE_BUF,
+	ST_DURING_TEST
+} cleanup = ST_CLEAR;
+
+static int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static int
+comp_perf_check_capabilities(struct comp_test_data *test_data)
+{
+	const struct rte_compressdev_capabilities *cap;
+
+	cap = rte_compressdev_capability_get(test_data->cdev_id,
+					     RTE_COMP_ALGO_DEFLATE);
+
+	if (cap == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not support DEFLATE\n");
+		return -1;
+	}
+
+	uint64_t comp_flags = cap->comp_feature_flags;
+
+	/* Huffman enconding */
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Fixed Huffman\n");
+		return -1;
+	}
+
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Dynamic Huffman\n");
+		return -1;
+	}
+
+	/* Window size */
+	if (test_data->window_sz != -1) {
+		if (param_range_check(test_data->window_sz, &cap->window_size)
+				< 0) {
+			RTE_LOG(ERR, USER1,
+				"Compress device does not support "
+				"this window size\n");
+			return -1;
+		}
+	} else
+		/* Set window size to PMD maximum if none was specified */
+		test_data->window_sz = cap->window_size.max;
+
+	/* Check if chained mbufs is supported */
+	if (test_data->max_sgl_segs > 1  &&
+			(comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
+		RTE_LOG(INFO, USER1, "Compress device does not support "
+				"chained mbufs. Max SGL segments set to 1\n");
+		test_data->max_sgl_segs = 1;
+	}
+
+	/* Level 0 support */
+	if (test_data->level.min == 0 &&
+			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
+		RTE_LOG(ERR, USER1, "Compress device does not support "
+				"level 0 (no compression)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+comp_perf_allocate_memory(struct comp_test_data *test_data)
+{
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	cleanup = ST_MEMORY_ALLOC;
+	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	test_data->op_pool = rte_comp_op_pool_create("op_pool",
+				  test_data->total_bufs,
+				  0, 0, rte_socket_id());
+	if (test_data->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	test_data->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+							+ MIN_ISAL_SIZE, 0,
+				rte_socket_id());
+	if (test_data->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (test_data->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->comp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+comp_perf_dump_input_data(struct comp_test_data *test_data)
+{
+	FILE *f = fopen(test_data->input_file, "r");
+	int ret = -1;
+
+	if (f == NULL) {
+		RTE_LOG(ERR, USER1, "Input file could not be opened\n");
+		return -1;
+	}
+
+	if (fseek(f, 0, SEEK_END) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+	size_t actual_file_sz = ftell(f);
+	/* If extended input data size has not been set,
+	 * input data size = file size
+	 */
+
+	if (test_data->input_data_sz == 0)
+		test_data->input_data_sz = actual_file_sz;
+
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+
+	test_data->input_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0, rte_socket_id());
+
+	if (test_data->input_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		goto end;
+	}
+
+	size_t remaining_data = test_data->input_data_sz;
+	uint8_t *data = test_data->input_data;
+
+	while (remaining_data > 0) {
+		size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
+
+		if (fread(data, data_to_read, 1, f) != 1) {
+			RTE_LOG(ERR, USER1, "Input file could not be read\n");
+			goto end;
+		}
+		if (fseek(f, 0, SEEK_SET) != 0) {
+			RTE_LOG(ERR, USER1,
+				"Size of input could not be calculated\n");
+			goto end;
+		}
+		remaining_data -= data_to_read;
+		data += data_to_read;
+	}
+
+	if (test_data->input_data_sz > actual_file_sz)
+		RTE_LOG(INFO, USER1,
+		  "%zu bytes read from file %s, extending the file %.2f times\n",
+			test_data->input_data_sz, test_data->input_file,
+			(double)test_data->input_data_sz/actual_file_sz);
+	else
+		RTE_LOG(INFO, USER1,
+			"%zu bytes read from file %s\n",
+			test_data->input_data_sz, test_data->input_file);
+
+	ret = 0;
+
+end:
+	fclose(f);
+	return ret;
+}
+
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
+{
+	uint8_t enabled_cdev_count;
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
+
+	if (enabled_cdev_count > 1)
+		RTE_LOG(INFO, USER1,
+			"Only the first compress device will be used\n");
+
+	test_data->cdev_id = enabled_cdevs[0];
+
+	if (comp_perf_check_capabilities(test_data) < 0)
+		return -1;
+
+	/* Configure compressdev (one device, one queue pair) */
+	struct rte_compressdev_config config = {
+		.socket_id = rte_socket_id(),
+		.nb_queue_pairs = 1,
+		.max_nb_priv_xforms = NUM_MAX_XFORMS,
+		.max_nb_streams = 0
+	};
+
+	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
+		RTE_LOG(ERR, USER1, "Device configuration failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
+			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
+		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_start(test_data->cdev_id) < 0) {
+		RTE_LOG(ERR, USER1, "Device could not be started\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+prepare_bufs(struct comp_test_data *test_data)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		test_data->decomp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+		if (test_data->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		cleanup = ST_PREPARE_BUF;
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		test_data->comp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->comp_buf_pool);
+		if (test_data->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->comp_bufs[i],
+					test_data->seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+free_bufs(struct comp_test_data *test_data)
+{
+	uint32_t i;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		rte_pktmbuf_free(test_data->comp_bufs[i]);
+		rte_pktmbuf_free(test_data->decomp_bufs[i]);
+	}
+}
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz,
+			unsigned int benchmarking)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	if (benchmarking) {
+		tsc_start = rte_rdtsc();
+		num_iter = test_data->num_iter;
+	} else
+		num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							  op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (benchmarking) {
+		tsc_end = rte_rdtsc();
+		tsc_duration = tsc_end - tsc_start;
+
+		if (type == RTE_COMP_COMPRESS)
+			test_data->comp_tsc_duration[level] =
+					tsc_duration / num_iter;
+		else
+			test_data->decomp_tsc_duration[level] =
+					tsc_duration / num_iter;
+	}
+
+	if (benchmarking == 0 && output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	uint8_t level, level_idx = 0;
+	int ret, i;
 	struct comp_test_data *test_data;
 
 	/* Initialise DPDK EAL */
@@ -29,24 +743,186 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
 	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
 		RTE_LOG(ERR, USER1,
 			"Parsing one or more user options failed\n");
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
 	}
 
 	if (comp_perf_options_check(test_data) < 0) {
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
+	}
+
+	if (comp_perf_initialize_compressdev(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_COMPDEV;
+	if (comp_perf_dump_input_data(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_INPUT_DATA;
+	if (comp_perf_allocate_memory(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (prepare_bufs(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->level.inc != 0)
+		level = test_data->level.min;
+	else
+		level = test_data->level.list[0];
+
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+
+	printf("Burst size = %u\n", test_data->burst_sz);
+	printf("File size = %zu\n", test_data->input_data_sz);
+
+	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
+		"Level", "Comp size", "Comp ratio [%]",
+		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
+		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+
+	cleanup = ST_DURING_TEST;
+	while (level <= test_data->level.max) {
+		/*
+		 * Run a first iteration, to verify compression and
+		 * get the compression ratio for the level
+		 */
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+			      test_data->compressed_data,
+			      &comp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+			      test_data->decompressed_data,
+			      &decomp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (decomp_data_sz != test_data->input_data_sz) {
+			RTE_LOG(ERR, USER1,
+		   "Decompressed data length not equal to input data length\n");
+			RTE_LOG(ERR, USER1,
+				"Decompressed size = %zu, expected = %zu\n",
+				decomp_data_sz, test_data->input_data_sz);
+			ret = EXIT_FAILURE;
+			goto end;
+		} else {
+			if (memcmp(test_data->decompressed_data,
+					test_data->input_data,
+					test_data->input_data_sz) != 0) {
+				RTE_LOG(ERR, USER1,
+			    "Decompressed data is not the same as file data\n");
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		double ratio = (double) comp_data_sz /
+						test_data->input_data_sz * 100;
+
+		/*
+		 * Run the tests twice, discarding the first performance
+		 * results, before the cache is warmed up
+		 */
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		uint64_t comp_tsc_duration =
+				test_data->comp_tsc_duration[level];
+		double comp_tsc_byte = (double)comp_tsc_duration /
+						test_data->input_data_sz;
+		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
+				1000000000;
+		uint64_t decomp_tsc_duration =
+				test_data->decomp_tsc_duration[level];
+		double decomp_tsc_byte = (double)decomp_tsc_duration /
+						test_data->input_data_sz;
+		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
+				1000000000;
+
+		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
+					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
+		       level, comp_data_sz, ratio, comp_tsc_duration,
+		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
+		       decomp_tsc_byte, decomp_gbps);
+
+		if (test_data->level.inc != 0)
+			level += test_data->level.inc;
+		else {
+			if (++level_idx == test_data->level.count)
+				break;
+			level = test_data->level.list[level_idx];
+		}
 	}
 
 	ret = EXIT_SUCCESS;
 
-err:
-	rte_free(test_data);
+end:
+	switch (cleanup) {
 
+	case ST_DURING_TEST:
+	case ST_PREPARE_BUF:
+		free_bufs(test_data);
+		/* fallthrough */
+	case ST_MEMORY_ALLOC:
+		rte_free(test_data->decomp_bufs);
+		rte_free(test_data->comp_bufs);
+		rte_free(test_data->decompressed_data);
+		rte_free(test_data->compressed_data);
+		rte_mempool_free(test_data->op_pool);
+		rte_mempool_free(test_data->decomp_buf_pool);
+		rte_mempool_free(test_data->comp_buf_pool);
+		/* fallthrough */
+	case ST_INPUT_DATA:
+		rte_free(test_data->input_data);
+		/* fallthrough */
+	case ST_COMPDEV:
+		if (test_data->cdev_id != -1)
+			rte_compressdev_stop(test_data->cdev_id);
+		/* fallthrough */
+	case ST_TEST_DATA:
+		rte_free(test_data);
+		/* fallthrough */
+	case ST_CLEAR:
+	default:
+		i = rte_eal_cleanup();
+		if (i) {
+			RTE_LOG(ERR, USER1,
+				"Error from rte_eal_cleanup(), %d\n", i);
+			ret = i;
+		}
+		break;
+	}
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser Tomasz Jozwiak
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-11-02  9:44   ` Tomasz Jozwiak
  2018-11-05  8:57     ` Verma, Shally
  2018-11-02 11:04   ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Bruce Richardson
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
  4 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-02  9:44 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added:
 -  initial version of compression performance test
    description file.
 -  release note in release_18_11.rst

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 MAINTAINERS                            |  5 +++
 doc/guides/rel_notes/release_18_11.rst |  6 +++
 doc/guides/tools/comp_perf.rst         | 75 ++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)
 create mode 100644 doc/guides/tools/comp_perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index e60379d..cfda6dd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1242,6 +1242,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Compression performance test application
+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
+F: app/test-compress-perf/
+F: doc/guides/tools/comp_perf.rst
+
 Crypto performance test application
 M: Declan Doherty <declan.doherty@intel.com>
 F: app/test-crypto-perf/
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 376128f..8bc7d05 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -285,6 +285,12 @@ New Features
   this application doesn't need to launch dedicated worker threads for vhost
   enqueue/dequeue operations.
 
+* **Added a compression performance test tool.**
+
+   Added a new performance test tool to test the compressdev PMD. The tool tests
+   compression ratio and compression throughput. Dynamic compression test is not
+   supported yet.
+
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
new file mode 100644
index 0000000..2f43412
--- /dev/null
+++ b/doc/guides/tools/comp_perf.rst
@@ -0,0 +1,75 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+dpdk-test-crypto-perf Application
+=================================
+
+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
+utility that allows measuring performance parameters of PMDs available in the
+compress tree. The application reads the data from a file (--input-file),
+dumps all the file into a buffer and fills out the data of input mbufs,
+which are passed to compress device with compression operations.
+Then, the output buffers are fed into the decompression stage, and the resulting
+data is compared against the original data (verification phase). After that,
+a number of iterations are performed, compressing first and decompressing later,
+to check the throughput rate
+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
+
+
+Limitations
+~~~~~~~~~~~
+
+* Only supports the fixed compression.
+
+Command line options
+--------------------
+
+ ``--driver-name NAME``: compress driver to use
+
+ ``--input-file NAME``: file to compress and decompress
+
+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
+
+ ``--seg-sz N``: size of segment to store the data (default: 2048)
+
+ ``--burst-sz N``: compress operation burst size
+
+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
+
+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 16)
+
+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
+
+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
+
+ ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+
+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
+
+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
+
+ ``-h``: prints this help
+
+
+Compiling the Application
+-------------------------
+
+**Step 1: PMD setting**
+
+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
+can be disabled by default in the build configuration file ``common_base``.
+The compression device drivers PMD which should be tested can be enabled by setting::
+
+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
+
+
+Running the Application
+-----------------------
+
+The tool application has a number of command line options. Here is the sample command line:
+
+.. code-block:: console
+
+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
+
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-12 10:15   ` Verma, Shally
  2018-10-15 15:10     ` Daly, Lee
@ 2018-11-02  9:59     ` Jozwiak, TomaszX
  2018-11-05  8:34       ` Verma, Shally
  1 sibling, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-02  9:59 UTC (permalink / raw)
  To: dev, Trahe, Fiona, akhil.goyal, Verma, Shally, De Lara Guarch, Pablo
  Cc: De, Lara, Guarch

Hi Shally,

Sorry for delay - I was on sick leave.
We had some issues with dynamic compression test so I block this test in V2. May be there's too late to add this into this release but we've decided to send this V2 to DPDK.

My comment inline (not all have answer so far, still working on that)

> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Friday, October 12, 2018 12:16 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> HI TomaszX
> 
> Sorry for delay in response. Comments inline.
> 
> >-----Original Message-----
> >From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
> >Sent: 01 October 2018 18:57
> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >akhil.goyal@nxp.com; pablo.de.lara.guarch@intel.com
> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >Added performance measurement part into compression perf. test.
> >
> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >---
> > app/test-compress-perf/main.c | 844
> >++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 844 insertions(+)
> >
> >diff --git a/app/test-compress-perf/main.c
> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
> >--- a/app/test-compress-perf/main.c
> >+++ b/app/test-compress-perf/main.c
> >@@ -5,13 +5,721 @@
> > #include <rte_malloc.h>
> > #include <rte_eal.h>
> > #include <rte_log.h>
> >+#include <rte_cycles.h>
> > #include <rte_compressdev.h>
> >
> > #include "comp_perf_options.h"
> >
> >+#define NUM_MAX_XFORMS 16
> >+#define NUM_MAX_INFLIGHT_OPS 512
> >+#define EXPANSE_RATIO 1.05
> >+#define MIN_ISAL_SIZE 8
> >+
> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >+
> >+static int
> >+param_range_check(uint16_t size, const struct rte_param_log2_range
> >+*range) {
> >+       unsigned int next_size;
> >+
> >+       /* Check lower/upper bounds */
> >+       if (size < range->min)
> >+               return -1;
> >+
> >+       if (size > range->max)
> >+               return -1;
> >+
> >+       /* If range is actually only one value, size is correct */
> >+       if (range->increment == 0)
> >+               return 0;
> >+
> >+       /* Check if value is one of the supported sizes */
> >+       for (next_size = range->min; next_size <= range->max;
> >+                       next_size += range->increment)
> >+               if (size == next_size)
> >+                       return 0;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >+       const struct rte_compressdev_capabilities *cap;
> >+
> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >+                                            RTE_COMP_ALGO_DEFLATE);
> >+
> >+       if (cap == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not support DEFLATE\n");
> >+               return -1;
> >+       }
> >+
> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >+
> >+       /* Huffman enconding */
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Fixed Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Dynamic Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       /* Window size */
> >+       if (test_data->window_sz != -1) {
> >+               if (param_range_check(test_data->window_sz,
> >+ &cap->window_size)
> What if cap->window_size is 0 i.e. implementation default?

TJ: You probably mean cap->window_size.increment = 0 (because cap->window_size is a structure). In that case we check if test_data->window_sz >=min and test_data->window_sz <= max only, because increment = 0 means (base on compression API) we have only one value of windows_size (no range is supported).



> 
> >+                               < 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Compress device does not support "
> >+                               "this window size\n");
> >+                       return -1;
> >+               }
> >+       } else
> >+               /* Set window size to PMD maximum if none was specified */
> >+               test_data->window_sz = cap->window_size.max;
> >+
> >+       /* Check if chained mbufs is supported */
> >+       if (test_data->max_sgl_segs > 1  &&
> >+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
> >+               RTE_LOG(INFO, USER1, "Compress device does not support "
> >+                               "chained mbufs. Max SGL segments set to 1\n");
> >+               test_data->max_sgl_segs = 1;
> >+       }
> >+
> >+       /* Level 0 support */
> >+       if (test_data->level.min == 0 &&
> >+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
> 0) {
> >+               RTE_LOG(ERR, USER1, "Compress device does not support "
> >+                               "level 0 (no compression)\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
> >+       /* Number of segments for input and output
> >+        * (compression and decompression)
> >+        */
> >+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
> >+                       test_data->seg_sz);
> >+       test_data->comp_buf_pool =
> rte_pktmbuf_pool_create("comp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->comp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decomp_buf_pool =
> rte_pktmbuf_pool_create("decomp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->decomp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->total_bufs = DIV_CEIL(total_segs,
> >+ test_data->max_sgl_segs);
> >+
> >+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
> >+                                 test_data->total_bufs,
> >+                                 0, 0, rte_socket_id());
> >+       if (test_data->op_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Comp op mempool could not be
> created\n");
> >+               return -1;
> >+       }
> >+
> >+       /*
> >+        * Compressed data might be a bit larger than input data,
> >+        * if data cannot be compressed
> Possible only if it's zlib format right? Or deflate as well?

TJ: This due to possibility of uncompressible data. In that case the compressed data can be bigger than input, because of frame, which has to be added into data . Yes it related to zlib and deflate as well.

> 
> >+        */
> >+       test_data->compressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz * EXPANSE_RATIO
> >+                                                       + MIN_ISAL_SIZE, 0,
> >+                               rte_socket_id());
> >+       if (test_data->compressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+                               rte_socket_id());
> >+       if (test_data->decompressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->comp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->decomp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the decompression
> mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
> >+       FILE *f = fopen(test_data->input_file, "r");
> >+
> >+       if (f == NULL) {
> >+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
> >+               return -1;
> >+       }
> >+
> >+       if (fseek(f, 0, SEEK_END) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto err;
> >+       }
> >+       size_t actual_file_sz = ftell(f);
> >+       /* If extended input data size has not been set,
> >+        * input data size = file size
> >+        */
> >+
> >+       if (test_data->input_data_sz == 0)
> >+               test_data->input_data_sz = actual_file_sz;
> >+
> >+       if (fseek(f, 0, SEEK_SET) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto err;
> >+       }
> >+
> >+       test_data->input_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+ rte_socket_id());
> >+
> >+       if (test_data->input_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               goto err;
> >+       }
> >+
> >+       size_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *data = test_data->input_data;
> >+
> >+       while (remaining_data > 0) {
> >+               size_t data_to_read = RTE_MIN(remaining_data,
> >+ actual_file_sz);
> >+
> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
> >+                       goto err;
> >+               }
> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Size of input could not be calculated\n");
> >+                       goto err;
> >+               }
> >+               remaining_data -= data_to_read;
> >+               data += data_to_read;
> It looks like it will run 2nd time only if input file size < input data size in which
> case it will just keep filling input buffer with repeated data.
> Is that the intention here?

TJ: Yes exactly. If test_data->input_data_sz is bigger than  actual_file_sz then we fill the buffer with repeated data from file to fill whole buffer.

> 
> >+       }
> >+
> >+       if (test_data->input_data_sz > actual_file_sz)
> >+               RTE_LOG(INFO, USER1,
> >+                 "%zu bytes read from file %s, extending the file %.2f times\n",
> >+                       test_data->input_data_sz, test_data->input_file,
> >+                       (double)test_data->input_data_sz/actual_file_sz);
> >+       else
> >+               RTE_LOG(INFO, USER1,
> >+                       "%zu bytes read from file %s\n",
> >+                       test_data->input_data_sz,
> >+ test_data->input_file);
> >+
> >+       fclose(f);
> >+
> >+       return 0;
> >+
> >+err:
> >+       fclose(f);
> >+       rte_free(test_data->input_data);
> >+       test_data->input_data = NULL;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
> >+       uint8_t enabled_cdev_count;
> >+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
> >+
> >+       enabled_cdev_count = rte_compressdev_devices_get(test_data-
> >driver_name,
> >+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
> >+       if (enabled_cdev_count == 0) {
> >+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
> >+                               test_data->driver_name);
> >+               return -EINVAL;
> >+       }
> >+
> >+       if (enabled_cdev_count > 1)
> >+               RTE_LOG(INFO, USER1,
> >+                       "Only the first compress device will be
> >+ used\n");
> >+
> >+       test_data->cdev_id = enabled_cdevs[0];
> >+
> >+       if (comp_perf_check_capabilities(test_data) < 0)
> >+               return -1;
> >+
> >+       /* Configure compressdev (one device, one queue pair) */
> >+       struct rte_compressdev_config config = {
> >+               .socket_id = rte_socket_id(),
> >+               .nb_queue_pairs = 1,
> >+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
> >+               .max_nb_streams = 0
> >+       };
> >+
> >+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
> >+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
> >+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device could not be started\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+prepare_bufs(struct comp_test_data *test_data) {
> >+       uint32_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *input_data_ptr = test_data->input_data;
> >+       size_t data_sz;
> >+       uint8_t *data_addr;
> >+       uint32_t i, j;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               /* Allocate data in input mbuf and copy data from input file */
> >+               test_data->decomp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+               if (test_data->decomp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+
> >+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->decomp_bufs[i], data_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                       return -1;
> >+               }
> >+               rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+
> >+               input_data_ptr += data_sz;
> >+               remaining_data -= data_sz;
> >+
> >+               /* Already one segment in the mbuf */
> >+               uint16_t segs_per_mbuf = 1;
> >+
> >+               /* Chain mbufs if needed for input mbufs */
> >+               while (segs_per_mbuf < test_data->max_sgl_segs
> >+                               && remaining_data > 0) {
> >+                       struct rte_mbuf *next_seg =
> >+
> >+ rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+
> >+                       if (next_seg == NULL) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                       "Could not allocate mbuf\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >+                               data_sz);
> >+
> >+                       if (data_addr == NULL) {
> >+                               RTE_LOG(ERR, USER1, "Could not append
> >+ data\n");
> Since a new buffer per segment is allocated, so is it possible for append to
> fail? think, this check is redundant here.

TJ: Yes, you're right, it should never fail. But I think it's good coding practice to add the check just in case.

> >+                               return -1;
> >+                       }
> >+
> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+                       input_data_ptr += data_sz;
> >+                       remaining_data -= data_sz;
> >+
> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
> >+                                       next_seg) < 0) {
> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >+                               return -1;
> >+                       }
> >+                       segs_per_mbuf++;
> >+               }
> >+
> >+               /* Allocate data in output mbuf */
> >+               test_data->comp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >+               if (test_data->comp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->comp_bufs[i],
> >+                                       test_data->seg_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                       return -1;
> >+               }
> >+
> >+               /* Chain mbufs if needed for output mbufs */
> >+               for (j = 1; j < segs_per_mbuf; j++) {
> >+                       struct rte_mbuf *next_seg =
> >+
> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >+
> >+                       if (next_seg == NULL) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                       "Could not allocate mbuf\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >+                               test_data->seg_sz);
> >+
> >+                       if (data_addr == NULL) {
> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
> >+                                       next_seg) < 0) {
> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >+                               return -1;
> >+                       }
> >+               }
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static void
> >+free_bufs(struct comp_test_data *test_data) {
> >+       uint32_t i;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
> >+       }
> >+       rte_free(test_data->comp_bufs);
> >+       rte_free(test_data->decomp_bufs); }
> >+
> >+static int
> >+main_loop(struct comp_test_data *test_data, uint8_t level,
> >+                       enum rte_comp_xform_type type,
> >+                       uint8_t *output_data_ptr,
> >+                       size_t *output_data_sz,
> >+                       unsigned int benchmarking) {
> >+       uint8_t dev_id = test_data->cdev_id;
> >+       uint32_t i, iter, num_iter;
> >+       struct rte_comp_op **ops, **deq_ops;
> >+       void *priv_xform = NULL;
> >+       struct rte_comp_xform xform;
> >+       size_t output_size = 0;
> >+       struct rte_mbuf **input_bufs, **output_bufs;
> >+       int res = 0;
> >+       int allocated = 0;
> >+
> >+       if (test_data == NULL || !test_data->burst_sz) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Unknow burst size\n");
> >+               return -1;
> >+       }
> >+
> >+       ops = rte_zmalloc_socket(NULL,
> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
> >+               0, rte_socket_id());
> >+
> >+       if (ops == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Can't allocate memory for ops strucures\n");
> >+               return -1;
> >+       }
> >+
> >+       deq_ops = &ops[test_data->total_bufs];
> >+
> >+       if (type == RTE_COMP_COMPRESS) {
> >+               xform = (struct rte_comp_xform) {
> >+                       .type = RTE_COMP_COMPRESS,
> >+                       .compress = {
> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >+                               .deflate.huffman = test_data->huffman_enc,
> >+                               .level = level,
> >+                               .window_size = test_data->window_sz,
> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >+                       }
> >+               };
> >+               input_bufs = test_data->decomp_bufs;
> >+               output_bufs = test_data->comp_bufs;
> >+       } else {
> >+               xform = (struct rte_comp_xform) {
> >+                       .type = RTE_COMP_DECOMPRESS,
> >+                       .decompress = {
> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >+                               .window_size = test_data->window_sz,
> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >+                       }
> >+               };
> >+               input_bufs = test_data->comp_bufs;
> >+               output_bufs = test_data->decomp_bufs;
> >+       }
> >+
> >+       /* Create private xform */
> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
> >+                       &priv_xform) < 0) {
> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
> >+               res = -1;
> >+               goto end;
> >+       }
> >+
> >+       uint64_t tsc_start, tsc_end, tsc_duration;
> >+
> >+       tsc_start = tsc_end = tsc_duration = 0;
> >+       if (benchmarking) {
> >+               tsc_start = rte_rdtsc();
> >+               num_iter = test_data->num_iter;
> >+       } else
> >+               num_iter = 1;
> Looks like in same code we're doing benchmarking and functional validation.
> It can be reorganised to keep validation test separately like done in
> crypto_perf.

TJ: Ok, makes sense. However in the interests of getting this into the 18.11 release I'd like to
defer this refactoring and the remainder of your comments below to the next release.


Next comments - WIP


Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
                     ` (2 preceding siblings ...)
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-02 11:04   ` Bruce Richardson
  2018-11-02 11:12     ` Jozwiak, TomaszX
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
  4 siblings, 1 reply; 76+ messages in thread
From: Bruce Richardson @ 2018-11-02 11:04 UTC (permalink / raw)
  To: Tomasz Jozwiak; +Cc: dev, fiona.trahe, Shally.Verma, akhil.goyal

On Fri, Nov 02, 2018 at 10:43:59AM +0100, Tomasz Jozwiak wrote:
> This patchset adds initial version of compression performance
> test.
> 
> v2 changes:
> 
>   -  Added release note
>   -  Added new cleanup flow into main function
>   -  Blocked dynamic compression test because it hasn't been
>      tested enough
>   -  Changed `--max-num-sgl-segs' default value to 16
>   -  Updated documentation
> 
> Opens:  comment from Shally Verma re separating validation from
>         benchmarking will be investigated in a later release.
>         Support for dynamic Huffman encoding will be added
>         in a later release.
> 
> Tomasz Jozwiak (3):
>   app/compress-perf: add parser
>   app/compress-perf: add performance measurement
>   doc/guides/tools: add doc files
> 
If this is a performance test app, should it not go in "test" rather than
"app" folder?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf
  2018-11-02 11:04   ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Bruce Richardson
@ 2018-11-02 11:12     ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-02 11:12 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev, Trahe, Fiona, Shally.Verma, akhil.goyal

Hi Bruce,

It's similar to test-crypto-perf and others like test-eventdev, test-pmd, etc. which are in 'app' folder, as well.

-- 
Tomek

> -----Original Message-----
> From: Richardson, Bruce
> Sent: Friday, November 2, 2018 12:04 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> Cc: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
> Shally.Verma@cavium.com; akhil.goyal@nxp.com
> Subject: Re: [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf
> 
> On Fri, Nov 02, 2018 at 10:43:59AM +0100, Tomasz Jozwiak wrote:
> > This patchset adds initial version of compression performance test.
> >
> > v2 changes:
> >
> >   -  Added release note
> >   -  Added new cleanup flow into main function
> >   -  Blocked dynamic compression test because it hasn't been
> >      tested enough
> >   -  Changed `--max-num-sgl-segs' default value to 16
> >   -  Updated documentation
> >
> > Opens:  comment from Shally Verma re separating validation from
> >         benchmarking will be investigated in a later release.
> >         Support for dynamic Huffman encoding will be added
> >         in a later release.
> >
> > Tomasz Jozwiak (3):
> >   app/compress-perf: add parser
> >   app/compress-perf: add performance measurement
> >   doc/guides/tools: add doc files
> >
> If this is a performance test app, should it not go in "test" rather than "app"
> folder?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-02  9:59     ` Jozwiak, TomaszX
@ 2018-11-05  8:34       ` Verma, Shally
  2018-11-06  8:04         ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-05  8:34 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal, De Lara Guarch, Pablo
  Cc: De, Lara, Guarch



>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 02 November 2018 15:29
>To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; Verma, Shally <Shally.Verma@cavium.com>; De Lara
>Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Hi Shally,
>
>Sorry for delay - I was on sick leave.
>We had some issues with dynamic compression test so I block this test in V2. May be there's too late to add this into this release but
>we've decided to send this V2 to DPDK.
>
>My comment inline (not all have answer so far, still working on that)
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Friday, October 12, 2018 12:16 PM
>> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
>> <pablo.de.lara.guarch@intel.com>
>> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> measurement
>>
>> HI TomaszX
>>
>> Sorry for delay in response. Comments inline.
>>
>> >-----Original Message-----
>> >From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
>> >Sent: 01 October 2018 18:57
>> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
>> >akhil.goyal@nxp.com; pablo.de.lara.guarch@intel.com
>> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> >measurement
>> >
>> >External Email
>> >
>> >Added performance measurement part into compression perf. test.
>> >
>> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>> >---
>> > app/test-compress-perf/main.c | 844
>> >++++++++++++++++++++++++++++++++++++++++++
>> > 1 file changed, 844 insertions(+)
>> >
>> >diff --git a/app/test-compress-perf/main.c
>> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
>> >--- a/app/test-compress-perf/main.c
>> >+++ b/app/test-compress-perf/main.c
>> >@@ -5,13 +5,721 @@
>> > #include <rte_malloc.h>
>> > #include <rte_eal.h>
>> > #include <rte_log.h>
>> >+#include <rte_cycles.h>
>> > #include <rte_compressdev.h>
>> >
>> > #include "comp_perf_options.h"
>> >
>> >+#define NUM_MAX_XFORMS 16
>> >+#define NUM_MAX_INFLIGHT_OPS 512
>> >+#define EXPANSE_RATIO 1.05
>> >+#define MIN_ISAL_SIZE 8
>> >+
>> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
>> >+
>> >+static int
>> >+param_range_check(uint16_t size, const struct rte_param_log2_range
>> >+*range) {
>> >+       unsigned int next_size;
>> >+
>> >+       /* Check lower/upper bounds */
>> >+       if (size < range->min)
>> >+               return -1;
>> >+
>> >+       if (size > range->max)
>> >+               return -1;
>> >+
>> >+       /* If range is actually only one value, size is correct */
>> >+       if (range->increment == 0)
>> >+               return 0;
>> >+
>> >+       /* Check if value is one of the supported sizes */
>> >+       for (next_size = range->min; next_size <= range->max;
>> >+                       next_size += range->increment)
>> >+               if (size == next_size)
>> >+                       return 0;
>> >+
>> >+       return -1;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
>> >+       const struct rte_compressdev_capabilities *cap;
>> >+
>> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>> >+                                            RTE_COMP_ALGO_DEFLATE);
>> >+
>> >+       if (cap == NULL) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not support DEFLATE\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       uint64_t comp_flags = cap->comp_feature_flags;
>> >+
>> >+       /* Huffman enconding */
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Fixed Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Dynamic Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       /* Window size */
>> >+       if (test_data->window_sz != -1) {
>> >+               if (param_range_check(test_data->window_sz,
>> >+ &cap->window_size)
>> What if cap->window_size is 0 i.e. implementation default?
>
>TJ: You probably mean cap->window_size.increment = 0 (because cap->window_size is a structure). In that case we check if
>test_data->window_sz >=min and test_data->window_sz <= max only, because increment = 0 means (base on compression API) we
>have only one value of windows_size (no range is supported).
But PMD can set min and max too 0 for such case.

>
>
>
....

>> >+
>> >+               if (fread(data, data_to_read, 1, f) != 1) {
>> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>> >+                       goto err;
>> >+               }
>> >+               if (fseek(f, 0, SEEK_SET) != 0) {
>> >+                       RTE_LOG(ERR, USER1,
>> >+                               "Size of input could not be calculated\n");
>> >+                       goto err;
>> >+               }
>> >+               remaining_data -= data_to_read;
>> >+               data += data_to_read;
>> It looks like it will run 2nd time only if input file size < input data size in which
>> case it will just keep filling input buffer with repeated data.
>> Is that the intention here?
>
>TJ: Yes exactly. If test_data->input_data_sz is bigger than  actual_file_sz then we fill the buffer with repeated data from file to fill
>whole buffer.
I mentioned in one of the earlier reply, wont that then influence the compression behaviour and o/p? my suggestion was to work on actual user provided input to take perf to get actual perf for given content.

>
>>
...

>> >+                       if (data_addr == NULL) {
>> >+                               RTE_LOG(ERR, USER1, "Could not append
>> >+ data\n");
>> Since a new buffer per segment is allocated, so is it possible for append to
>> fail? think, this check is redundant here.
>
>TJ: Yes, you're right, it should never fail. But I think it's good coding practice to add the check just in case.
>
Unless it is called in data path which might cost perf a bit.

Thanks
Shally

>> >+                               return -1;
>> >+                       }
>> >+
>> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
>> >+                       input_data_ptr += data_sz;
>> >+                       remaining_data -= data_sz;
>> >+
>> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
>> >+                                       next_seg) < 0) {
>> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >+                               return -1;
>> >+                       }
>> >+                       segs_per_mbuf++;
>> >+               }
>> >+
>> >+               /* Allocate data in output mbuf */
>> >+               test_data->comp_bufs[i] =
>> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >+               if (test_data->comp_bufs[i] == NULL) {
>> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>> >+                       return -1;
>> >+               }
>> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
>> >+                                       test_data->comp_bufs[i],
>> >+                                       test_data->seg_sz);
>> >+               if (data_addr == NULL) {
>> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>> >+                       return -1;
>> >+               }
>> >+
>> >+               /* Chain mbufs if needed for output mbufs */
>> >+               for (j = 1; j < segs_per_mbuf; j++) {
>> >+                       struct rte_mbuf *next_seg =
>> >+
>> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >+
>> >+                       if (next_seg == NULL) {
>> >+                               RTE_LOG(ERR, USER1,
>> >+                                       "Could not allocate mbuf\n");
>> >+                               return -1;
>> >+                       }
>> >+
>> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>> >+                               test_data->seg_sz);
>> >+
>> >+                       if (data_addr == NULL) {
>> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>> >+                               return -1;
>> >+                       }
>> >+
>> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
>> >+                                       next_seg) < 0) {
>> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >+                               return -1;
>> >+                       }
>> >+               }
>> >+       }
>> >+
>> >+       return 0;
>> >+}
>> >+
>> >+static void
>> >+free_bufs(struct comp_test_data *test_data) {
>> >+       uint32_t i;
>> >+
>> >+       for (i = 0; i < test_data->total_bufs; i++) {
>> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
>> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
>> >+       }
>> >+       rte_free(test_data->comp_bufs);
>> >+       rte_free(test_data->decomp_bufs); }
>> >+
>> >+static int
>> >+main_loop(struct comp_test_data *test_data, uint8_t level,
>> >+                       enum rte_comp_xform_type type,
>> >+                       uint8_t *output_data_ptr,
>> >+                       size_t *output_data_sz,
>> >+                       unsigned int benchmarking) {
>> >+       uint8_t dev_id = test_data->cdev_id;
>> >+       uint32_t i, iter, num_iter;
>> >+       struct rte_comp_op **ops, **deq_ops;
>> >+       void *priv_xform = NULL;
>> >+       struct rte_comp_xform xform;
>> >+       size_t output_size = 0;
>> >+       struct rte_mbuf **input_bufs, **output_bufs;
>> >+       int res = 0;
>> >+       int allocated = 0;
>> >+
>> >+       if (test_data == NULL || !test_data->burst_sz) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Unknow burst size\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       ops = rte_zmalloc_socket(NULL,
>> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
>> >+               0, rte_socket_id());
>> >+
>> >+       if (ops == NULL) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Can't allocate memory for ops strucures\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       deq_ops = &ops[test_data->total_bufs];
>> >+
>> >+       if (type == RTE_COMP_COMPRESS) {
>> >+               xform = (struct rte_comp_xform) {
>> >+                       .type = RTE_COMP_COMPRESS,
>> >+                       .compress = {
>> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >+                               .deflate.huffman = test_data->huffman_enc,
>> >+                               .level = level,
>> >+                               .window_size = test_data->window_sz,
>> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >+                       }
>> >+               };
>> >+               input_bufs = test_data->decomp_bufs;
>> >+               output_bufs = test_data->comp_bufs;
>> >+       } else {
>> >+               xform = (struct rte_comp_xform) {
>> >+                       .type = RTE_COMP_DECOMPRESS,
>> >+                       .decompress = {
>> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >+                               .window_size = test_data->window_sz,
>> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >+                       }
>> >+               };
>> >+               input_bufs = test_data->comp_bufs;
>> >+               output_bufs = test_data->decomp_bufs;
>> >+       }
>> >+
>> >+       /* Create private xform */
>> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
>> >+                       &priv_xform) < 0) {
>> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
>> >+               res = -1;
>> >+               goto end;
>> >+       }
>> >+
>> >+       uint64_t tsc_start, tsc_end, tsc_duration;
>> >+
>> >+       tsc_start = tsc_end = tsc_duration = 0;
>> >+       if (benchmarking) {
>> >+               tsc_start = rte_rdtsc();
>> >+               num_iter = test_data->num_iter;
>> >+       } else
>> >+               num_iter = 1;
>> Looks like in same code we're doing benchmarking and functional validation.
>> It can be reorganised to keep validation test separately like done in
>> crypto_perf.
>
>TJ: Ok, makes sense. However in the interests of getting this into the 18.11 release I'd like to
>defer this refactoring and the remainder of your comments below to the next release.
>
>
>Next comments - WIP
>
>
>Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-11-05  8:40     ` Verma, Shally
  2018-11-06  8:30       ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-05  8:40 UTC (permalink / raw)
  To: Tomasz Jozwiak, dev, fiona.trahe, akhil.goyal



>-----Original Message-----
>From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>Sent: 02 November 2018 15:14
>To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com; Verma, Shally <Shally.Verma@cavium.com>;
>akhil.goyal@nxp.com
>Subject: [PATCH v2 1/3] app/compress-perf: add parser
>
>External Email
>
>Added parser part into compression perf. test.
>
>Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>---
> app/Makefile                                     |   4 +
> app/meson.build                                  |   1 +
> app/test-compress-perf/Makefile                  |  16 +
> app/test-compress-perf/comp_perf_options.h       |  59 +++
> app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++++++++++
> app/test-compress-perf/main.c                    |  52 ++
> app/test-compress-perf/meson.build               |   7 +
> config/common_base                               |   5 +
> 8 files changed, 740 insertions(+)
> create mode 100644 app/test-compress-perf/Makefile
> create mode 100644 app/test-compress-perf/comp_perf_options.h
> create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
> create mode 100644 app/test-compress-perf/main.c
> create mode 100644 app/test-compress-perf/meson.build
>
>diff --git a/app/Makefile b/app/Makefile
>index 069fa98..d6641ef 100644
>--- a/app/Makefile
>+++ b/app/Makefile
>@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
> DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
> endif
>
>+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
>+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
>+endif
>+
> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
> DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
> endif
>diff --git a/app/meson.build b/app/meson.build
>index a9a026b..47a2a86 100644
>--- a/app/meson.build
>+++ b/app/meson.build
>@@ -4,6 +4,7 @@
> apps = ['pdump',
>        'proc-info',
>        'test-bbdev',
>+       'test-compress-perf',
>        'test-crypto-perf',
>        'test-eventdev',
>        'test-pmd']
>diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
>new file mode 100644
>index 0000000..8aa7a22
>--- /dev/null
>+++ b/app/test-compress-perf/Makefile
>@@ -0,0 +1,16 @@
>+# SPDX-License-Identifier: BSD-3-Clause
>+# Copyright(c) 2018 Intel Corporation
>+
>+include $(RTE_SDK)/mk/rte.vars.mk
>+
>+APP = dpdk-test-compress-perf
>+
>+CFLAGS += $(WERROR_FLAGS)
>+CFLAGS += -DALLOW_EXPERIMENTAL_API
>+CFLAGS += -O3
>+
>+# all source are stored in SRCS-y
>+SRCS-y := main.c
>+SRCS-y += comp_perf_options_parse.c
>+
>+include $(RTE_SDK)/mk/rte.app.mk
>diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
>new file mode 100644
>index 0000000..7516ea0
>--- /dev/null
>+++ b/app/test-compress-perf/comp_perf_options.h
>@@ -0,0 +1,59 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ * Copyright(c) 2018 Intel Corporation
>+ */
>+
>+#define MAX_DRIVER_NAME                64
>+#define MAX_INPUT_FILE_NAME    64
>+#define MAX_LIST               32
>+
>+enum comp_operation {
>+       COMPRESS_ONLY,
>+       DECOMPRESS_ONLY,
>+       COMPRESS_DECOMPRESS
>+};
>+
>+struct range_list {
>+       uint8_t min;
>+       uint8_t max;
>+       uint8_t inc;
>+       uint8_t count;
>+       uint8_t list[MAX_LIST];
>+};
>+
>+struct comp_test_data {
>+       char driver_name[64];
>+       char input_file[64];
>+       struct rte_mbuf **comp_bufs;
>+       struct rte_mbuf **decomp_bufs;
>+       uint32_t total_bufs;
>+       uint8_t *input_data;
>+       size_t input_data_sz;
>+       uint8_t *compressed_data;
>+       uint8_t *decompressed_data;
>+       struct rte_mempool *comp_buf_pool;
>+       struct rte_mempool *decomp_buf_pool;
>+       struct rte_mempool *op_pool;
>+       int8_t cdev_id;
>+       uint16_t seg_sz;
>+       uint16_t burst_sz;
>+       uint32_t pool_sz;
>+       uint32_t num_iter;
>+       uint16_t max_sgl_segs;
>+       enum rte_comp_huffman huffman_enc;
>+       enum comp_operation test_op;
>+       int window_sz;
>+       struct range_list level;
>+       /* Store TSC duration for all levels (including level 0) */
>+       uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
>+       uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
>+};
>+
>+int
>+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
>+                       char **argv);
>+
>+void
>+comp_perf_options_default(struct comp_test_data *test_data);
>+
>+int
>+comp_perf_options_check(struct comp_test_data *test_data);
>diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
>new file mode 100644
>index 0000000..bef4d2f
>--- /dev/null
>+++ b/app/test-compress-perf/comp_perf_options_parse.c
>@@ -0,0 +1,596 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ * Copyright(c) 2018 Intel Corporation
>+ */
>+
>+#include <getopt.h>
>+#include <stdint.h>
>+#include <stdio.h>
>+#include <string.h>
>+#include <inttypes.h>
>+#include <stdlib.h>
>+#include <errno.h>
>+
>+#include <rte_string_fns.h>
>+#include <rte_comp.h>
>+
>+#include "comp_perf_options.h"
>+
>+#define CPERF_DRIVER_NAME      ("driver-name")
>+#define CPERF_TEST_FILE                ("input-file")
>+#define CPERF_SEG_SIZE         ("seg-sz")
>+#define CPERF_BURST_SIZE       ("burst-sz")
>+#define CPERF_EXTENDED_SIZE    ("extended-input-sz")
>+#define CPERF_POOL_SIZE                ("pool-sz")
>+#define CPERF_MAX_SGL_SEGS     ("max-num-sgl-segs")
>+#define CPERF_NUM_ITER         ("num-iter")
>+#define CPERF_OPTYPE           ("operation")
>+#define CPERF_HUFFMAN_ENC      ("huffman-enc")
>+#define CPERF_LEVEL            ("compress-level")
>+#define CPERF_WINDOW_SIZE      ("window-sz")
>+
>+struct name_id_map {
>+       const char *name;
>+       uint32_t id;
>+};
>+
>+static void
>+usage(char *progname)
>+{
>+       printf("%s [EAL options] --\n"
>+               " --driver-name NAME: compress driver to use\n"
>+               " --input-file NAME: file to compress and decompress\n"
>+               " --extended-input-sz N: extend file data up to this size (default: no extension)\n"
>+               " --seg-sz N: size of segment to store the data (default: 2048)\n"
>+               " --burst-sz N: compress operation burst size\n"
>+               " --pool-sz N: mempool size for compress operations/mbufs\n"
>+               "               (default: 8192)\n"
>+               " --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
We still taking it as input?

>+               "               (default: 65535)\n"
>+               " --num-iter N: number of times the file will be\n"
>+               "               compressed/decompressed (default: 10000)\n"
>+               " --operation [comp/decomp/comp_and_decomp]: perform test on\n"
>+               "               compression, decompression or both operations\n"
>+               " --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
>+               "               (default: dynamic)\n"
>+               " --compress-level N: compression level, which could be a single value, list or range\n"
>+               "               (default: range between 1 and 9)\n"
>+               " --window-sz N: base two log value of compression window size\n"
>+               "               (e.g.: 15 => 32k, default: max supported by PMD)\n"
>+               " -h: prints this help\n",
>+               progname);
>+}
>+
>+static int
>+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
>+               const char *str_key)
>+{
>+       unsigned int i;
>+
>+       for (i = 0; i < map_len; i++) {
>+
>+               if (strcmp(str_key, map[i].name) == 0)
>+                       return map[i].id;
>+       }
>+
>+       return -1;
>+}
>+
>+static int
>+parse_uint32_t(uint32_t *value, const char *arg)
>+{
>+       char *end = NULL;
>+       unsigned long n = strtoul(arg, &end, 10);
>+
>+       if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
>+               return -1;
>+
>+       if (n > UINT32_MAX)
>+               return -ERANGE;
>+
>+       *value = (uint32_t) n;
>+
>+       return 0;
>+}
>+
>+static int
>+parse_uint16_t(uint16_t *value, const char *arg)
>+{
>+       uint32_t val = 0;
>+       int ret = parse_uint32_t(&val, arg);
>+
>+       if (ret < 0)
>+               return ret;
>+
>+       if (val > UINT16_MAX)
>+               return -ERANGE;
>+
>+       *value = (uint16_t) val;
>+
>+       return 0;
>+}
>+
>+static int
>+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
>+{
>+       char *token;
>+       uint8_t number;
>+
>+       char *copy_arg = strdup(arg);
>+
>+       if (copy_arg == NULL)
>+               return -1;
>+
>+       errno = 0;
>+       token = strtok(copy_arg, ":");
>+
>+       /* Parse minimum value */
>+       if (token != NULL) {
>+               number = strtoul(token, NULL, 10);
>+
>+               if (errno == EINVAL || errno == ERANGE)
>+                       goto err_range;
>+
>+               *min = number;
>+       } else
>+               goto err_range;
>+
>+       token = strtok(NULL, ":");
>+
>+       /* Parse increment value */
>+       if (token != NULL) {
>+               number = strtoul(token, NULL, 10);
>+
>+               if (errno == EINVAL || errno == ERANGE ||
>+                               number == 0)
>+                       goto err_range;
>+
>+               *inc = number;
>+       } else
>+               goto err_range;
>+
>+       token = strtok(NULL, ":");
>+
>+       /* Parse maximum value */
>+       if (token != NULL) {
>+               number = strtoul(token, NULL, 10);
>+
>+               if (errno == EINVAL || errno == ERANGE ||
>+                               number < *min)
>+                       goto err_range;
>+
>+               *max = number;
>+       } else
>+               goto err_range;
>+
>+       if (strtok(NULL, ":") != NULL)
>+               goto err_range;
>+
>+       free(copy_arg);
>+       return 0;
>+
>+err_range:
>+       free(copy_arg);
>+       return -1;
>+}
>+
>+static int
>+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
>+{
>+       char *token;
>+       uint32_t number;
>+       uint8_t count = 0;
>+       uint32_t temp_min;
>+       uint32_t temp_max;
>+
>+       char *copy_arg = strdup(arg);
>+
>+       if (copy_arg == NULL)
>+               return -1;
>+
>+       errno = 0;
>+       token = strtok(copy_arg, ",");
>+
>+       /* Parse first value */
>+       if (token != NULL) {
>+               number = strtoul(token, NULL, 10);
>+
>+               if (errno == EINVAL || errno == ERANGE)
>+                       goto err_list;
>+
>+               list[count++] = number;
>+               temp_min = number;
>+               temp_max = number;
>+       } else
>+               goto err_list;
>+
>+       token = strtok(NULL, ",");
>+
>+       while (token != NULL) {
>+               if (count == MAX_LIST) {
>+                       RTE_LOG(WARNING, USER1,
>+                               "Using only the first %u sizes\n",
>+                                       MAX_LIST);
>+                       break;
>+               }
>+
>+               number = strtoul(token, NULL, 10);
>+
>+               if (errno == EINVAL || errno == ERANGE)
>+                       goto err_list;
>+
>+               list[count++] = number;
>+
>+               if (number < temp_min)
>+                       temp_min = number;
>+               if (number > temp_max)
>+                       temp_max = number;
>+
>+               token = strtok(NULL, ",");
>+       }
>+
>+       if (min)
>+               *min = temp_min;
>+       if (max)
>+               *max = temp_max;
>+
>+       free(copy_arg);
>+       return count;
>+
>+err_list:
>+       free(copy_arg);
>+       return -1;
>+}
>+
>+static int
>+parse_num_iter(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint32_t(&test_data->num_iter, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
>+               return -1;
>+       }
>+
>+       if (test_data->num_iter == 0) {
>+               RTE_LOG(ERR, USER1,
>+                               "Total number of iterations must be higher than 0\n");
>+               return -1;
>+       }
>+
>+       return ret;
>+}
>+
>+static int
>+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint32_t(&test_data->pool_sz, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse pool size");
>+               return -1;
>+       }
>+
>+       if (test_data->pool_sz == 0) {
>+               RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
>+               return -1;
>+       }
>+
>+       return ret;
>+}
>+
>+static int
>+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint16_t(&test_data->burst_sz, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
>+               return -1;
>+       }
>+
>+       if (test_data->burst_sz == 0) {
>+               RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
>+{
>+       uint32_t tmp;
>+       int ret = parse_uint32_t(&tmp, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
>+               return -1;
>+       }
>+       test_data->input_data_sz = tmp;
>+
>+       if (tmp == 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Extended file size must be higher than 0\n");
>+               return -1;
>+       }
>+       return 0;
>+}
>+
>+static int
>+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint16_t(&test_data->seg_sz, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
>+               return -1;
>+       }
>+
>+       if (test_data->seg_sz == 0) {
>+               RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1,
>+                       "Failed to parse max number of segments per mbuf chain\n");
>+               return -1;
>+       }
>+
>+       if (test_data->max_sgl_segs == 0) {
>+               RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
>+                       "must be higher than 0\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+parse_window_sz(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
>+
>+       if (ret) {
>+               RTE_LOG(ERR, USER1, "Failed to parse window size\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+parse_driver_name(struct comp_test_data *test_data, const char *arg)
>+{
>+       if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
>+               return -1;
>+
>+       rte_strlcpy(test_data->driver_name, arg,
>+                       sizeof(test_data->driver_name));
>+
>+       return 0;
>+}
>+
>+static int
>+parse_test_file(struct comp_test_data *test_data, const char *arg)
>+{
>+       if (strlen(arg) > (sizeof(test_data->input_file) - 1))
>+               return -1;
>+
>+       rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
>+
>+       return 0;
>+}
>+
>+static int
>+parse_op_type(struct comp_test_data *test_data, const char *arg)
>+{
>+       struct name_id_map optype_namemap[] = {
>+               {
>+                       "comp",
>+                       COMPRESS_ONLY
>+               },
>+               {
>+                       "decomp",
>+                       DECOMPRESS_ONLY
>+               },
>+               {
>+                       "comp_and_decomp",
>+                       COMPRESS_DECOMPRESS
>+               }
>+       };
>+
>+       int id = get_str_key_id_mapping(optype_namemap,
>+                       RTE_DIM(optype_namemap), arg);
>+       if (id < 0) {
>+               RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
>+               return -1;
>+       }
>+
>+       test_data->test_op = (enum comp_operation)id;
>+
>+       return 0;
>+}
>+
>+static int
>+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
>+{
>+       struct name_id_map huffman_namemap[] = {
>+               {
>+                       "default",
>+                       RTE_COMP_HUFFMAN_DEFAULT
>+               },
>+               {
>+                       "fixed",
>+                       RTE_COMP_HUFFMAN_FIXED
>+               },
>+               {
>+                       "dynamic",
>+                       RTE_COMP_HUFFMAN_DYNAMIC
>+               }
>+       };
>+
>+       int id = get_str_key_id_mapping(huffman_namemap,
>+                       RTE_DIM(huffman_namemap), arg);
>+       if (id < 0) {
>+               RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
>+               return -1;
>+       }
>+
>+       test_data->huffman_enc = (enum rte_comp_huffman)id;
>+
>+       return 0;
>+}
>+
>+static int
>+parse_level(struct comp_test_data *test_data, const char *arg)
>+{
>+       int ret;
>+
>+       /*
>+        * Try parsing the argument as a range, if it fails,
>+        * arse it as a list
>+        */
>+       if (parse_range(arg, &test_data->level.min, &test_data->level.max,
>+                       &test_data->level.inc) < 0) {
>+               ret = parse_list(arg, test_data->level.list,
>+                                       &test_data->level.min,
>+                                       &test_data->level.max);
>+               if (ret < 0) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Failed to parse compression level/s\n");
>+                       return -1;
>+               }
>+               test_data->level.count = ret;
>+
>+               if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
>+                       RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
>+                                       RTE_COMP_LEVEL_MAX);
>+                       return -1;
>+               }
>+       }
>+
>+       return 0;
>+}
>+
>+typedef int (*option_parser_t)(struct comp_test_data *test_data,
>+               const char *arg);
>+
>+struct long_opt_parser {
>+       const char *lgopt_name;
>+       option_parser_t parser_fn;
>+
>+};
>+
>+static struct option lgopts[] = {
>+
>+       { CPERF_DRIVER_NAME, required_argument, 0, 0 },
>+       { CPERF_TEST_FILE, required_argument, 0, 0 },
>+       { CPERF_SEG_SIZE, required_argument, 0, 0 },
>+       { CPERF_BURST_SIZE, required_argument, 0, 0 },
>+       { CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
>+       { CPERF_POOL_SIZE, required_argument, 0, 0 },
>+       { CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
>+       { CPERF_NUM_ITER, required_argument, 0, 0 },
>+       { CPERF_OPTYPE, required_argument, 0, 0 },
>+       { CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
>+       { CPERF_LEVEL, required_argument, 0, 0 },
>+       { CPERF_WINDOW_SIZE, required_argument, 0, 0 },
>+       { NULL, 0, 0, 0 }
>+};
>+static int
>+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
>+{
>+       struct long_opt_parser parsermap[] = {
>+               { CPERF_DRIVER_NAME,    parse_driver_name },
>+               { CPERF_TEST_FILE,      parse_test_file },
>+               { CPERF_SEG_SIZE,       parse_seg_sz },
>+               { CPERF_BURST_SIZE,     parse_burst_sz },
>+               { CPERF_EXTENDED_SIZE,  parse_extended_input_sz },
>+               { CPERF_POOL_SIZE,      parse_pool_sz },
>+               { CPERF_MAX_SGL_SEGS,   parse_max_num_sgl_segs },
>+               { CPERF_NUM_ITER,       parse_num_iter },
>+               { CPERF_OPTYPE,         parse_op_type },
>+               { CPERF_HUFFMAN_ENC,    parse_huffman_enc },
>+               { CPERF_LEVEL,          parse_level },
>+               { CPERF_WINDOW_SIZE,    parse_window_sz },
>+       };
>+       unsigned int i;
>+
>+       for (i = 0; i < RTE_DIM(parsermap); i++) {
>+               if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
>+                               strlen(lgopts[opt_idx].name)) == 0)
>+                       return parsermap[i].parser_fn(test_data, optarg);
>+       }
>+
>+       return -EINVAL;
>+}
>+
>+int
>+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
>+{
>+       int opt, retval, opt_idx;
>+
>+       while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
>+               switch (opt) {
>+               case 'h':
>+                       usage(argv[0]);
>+                       rte_exit(EXIT_SUCCESS, "Displayed help\n");
>+                       break;
>+               /* long options */
>+               case 0:
>+                       retval = comp_perf_opts_parse_long(opt_idx, test_data);
>+                       if (retval != 0)
>+                               return retval;
>+
>+                       break;
>+
>+               default:
>+                       usage(argv[0]);
>+                       return -EINVAL;
>+               }
>+       }
>+
>+       return 0;
>+}
>+
>+void
>+comp_perf_options_default(struct comp_test_data *test_data)
>+{
>+       test_data->cdev_id = -1;
>+       test_data->seg_sz = 2048;
>+       test_data->burst_sz = 32;
>+       test_data->pool_sz = 8192;
>+       test_data->max_sgl_segs = UINT16_MAX;
>+       test_data->num_iter = 10000;
>+       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
>+       test_data->test_op = COMPRESS_DECOMPRESS;
>+       test_data->window_sz = -1;
>+       test_data->level.min = 1;
>+       test_data->level.max = 9;
>+       test_data->level.inc = 1;
>+}
>+
>+int
>+comp_perf_options_check(struct comp_test_data *test_data)
>+{
>+       if (strcmp(test_data->driver_name, "") == 0) {
>+               RTE_LOG(ERR, USER1, "Driver name has to be set\n");
>+               return -1;
>+       }
>+
>+       if (strcmp(test_data->input_file, "") == 0) {
>+               RTE_LOG(ERR, USER1, "Input file name has to be set\n");
>+               return -1;
>+       }
Think other params such as window sz , Huffman coding level too should be test and adjusted according to driver capability

Thanks
Shally
>+
>+       return 0;
>+}
>diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
>new file mode 100644
>index 0000000..f52b98d
>--- /dev/null
>+++ b/app/test-compress-perf/main.c
>@@ -0,0 +1,52 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ * Copyright(c) 2018 Intel Corporation
>+ */
>+
>+#include <rte_malloc.h>
>+#include <rte_eal.h>
>+#include <rte_log.h>
>+#include <rte_compressdev.h>
>+
>+#include "comp_perf_options.h"
>+
>+int
>+main(int argc, char **argv)
>+{
>+       int ret;
>+       struct comp_test_data *test_data;
>+
>+       /* Initialise DPDK EAL */
>+       ret = rte_eal_init(argc, argv);
>+       if (ret < 0)
>+               rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
>+       argc -= ret;
>+       argv += ret;
>+
>+       test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
>+                                       0, rte_socket_id());
>+
>+       if (test_data == NULL)
>+               rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
>+                               rte_socket_id());
>+
>+       comp_perf_options_default(test_data);
>+
>+       if (comp_perf_options_parse(test_data, argc, argv) < 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Parsing one or more user options failed\n");
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       if (comp_perf_options_check(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto err;
>+       }
>+
>+       ret = EXIT_SUCCESS;
>+
>+err:
>+       rte_free(test_data);
>+
>+       return ret;
>+}
>diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
>new file mode 100644
>index 0000000..ba6d64d
>--- /dev/null
>+++ b/app/test-compress-perf/meson.build
>@@ -0,0 +1,7 @@
>+# SPDX-License-Identifier: BSD-3-Clause
>+# Copyright(c) 2018 Intel Corporation
>+
>+allow_experimental_apis = true
>+sources = files('comp_perf_options_parse.c',
>+               'main.c')
>+deps = ['compressdev']
>diff --git a/config/common_base b/config/common_base
>index d12ae98..2ab4b7b 100644
>--- a/config/common_base
>+++ b/config/common_base
>@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> CONFIG_RTE_TEST_BBDEV=y
>
> #
>+# Compile the compression performance application
>+#
>+CONFIG_RTE_APP_COMPRESS_PERF=y
>+
>+#
> # Compile the crypto performance application
> #
> CONFIG_RTE_APP_CRYPTO_PERF=y
>--
>2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-11-05  8:56     ` Verma, Shally
  2018-11-06  8:49       ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-05  8:56 UTC (permalink / raw)
  To: Tomasz Jozwiak, dev, fiona.trahe, akhil.goyal



>-----Original Message-----
>From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>Sent: 02 November 2018 15:14
>To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com; Verma, Shally <Shally.Verma@cavium.com>;
>akhil.goyal@nxp.com
>Subject: [PATCH v2 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Added performance measurement part into compression perf. test.
>
>Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>---
> app/test-compress-perf/comp_perf_options_parse.c |   8 +-
> app/test-compress-perf/main.c                    | 886 ++++++++++++++++++++++-
> 2 files changed, 883 insertions(+), 11 deletions(-)
>
>diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
>index bef4d2f..e5da3ad 100644
>--- a/app/test-compress-perf/comp_perf_options_parse.c
>+++ b/app/test-compress-perf/comp_perf_options_parse.c
>@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
>                {
>                        "fixed",
>                        RTE_COMP_HUFFMAN_FIXED
>-               },
>-               {
>-                       "dynamic",
>-                       RTE_COMP_HUFFMAN_DYNAMIC
>                }
>        };
>
>@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
>        test_data->seg_sz = 2048;
>        test_data->burst_sz = 32;
>        test_data->pool_sz = 8192;
>-       test_data->max_sgl_segs = UINT16_MAX;
>+       test_data->max_sgl_segs = 16;
>        test_data->num_iter = 10000;
>-       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
>+       test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
>        test_data->test_op = COMPRESS_DECOMPRESS;
>        test_data->window_sz = -1;
>        test_data->level.min = 1;
>diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
>index f52b98d..e3f4bf6 100644
>--- a/app/test-compress-perf/main.c
>+++ b/app/test-compress-perf/main.c
>@@ -5,14 +5,728 @@
> #include <rte_malloc.h>
> #include <rte_eal.h>
> #include <rte_log.h>
>+#include <rte_cycles.h>
> #include <rte_compressdev.h>
>
> #include "comp_perf_options.h"
>
>+#define NUM_MAX_XFORMS 16
>+#define NUM_MAX_INFLIGHT_OPS 512
>+#define EXPANSE_RATIO 1.05
>+#define MIN_ISAL_SIZE 8
Can we avoid ISAL specific naming ?
>+
>+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
>+
>+/* Cleanup state machine */
>+static enum cleanup_st {
>+       ST_CLEAR = 0,
>+       ST_TEST_DATA,
>+       ST_COMPDEV,
>+       ST_INPUT_DATA,
>+       ST_MEMORY_ALLOC,
>+       ST_PREPARE_BUF,
>+       ST_DURING_TEST
>+} cleanup = ST_CLEAR;
>+
>+static int
>+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
>+{
>+       unsigned int next_size;
>+
>+       /* Check lower/upper bounds */
>+       if (size < range->min)
>+               return -1;
>+
>+       if (size > range->max)
>+               return -1;
>+
>+       /* If range is actually only one value, size is correct */
>+       if (range->increment == 0)
>+               return 0;
>+
>+       /* Check if value is one of the supported sizes */
>+       for (next_size = range->min; next_size <= range->max;
>+                       next_size += range->increment)
>+               if (size == next_size)
>+                       return 0;
>+
>+       return -1;
>+}
>+
>+static int
>+comp_perf_check_capabilities(struct comp_test_data *test_data)
>+{
>+       const struct rte_compressdev_capabilities *cap;
>+
>+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>+                                            RTE_COMP_ALGO_DEFLATE);
>+
>+       if (cap == NULL) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not support DEFLATE\n");
>+               return -1;
>+       }
>+
>+       uint64_t comp_flags = cap->comp_feature_flags;
>+
>+       /* Huffman enconding */
>+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not supported Fixed Huffman\n");
>+               return -1;
>+       }
>+
>+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>+               RTE_LOG(ERR, USER1,
>+                       "Compress device does not supported Dynamic Huffman\n");
>+               return -1;
>+       }
>+
>+       /* Window size */
>+       if (test_data->window_sz != -1) {
>+               if (param_range_check(test_data->window_sz, &cap->window_size)
>+                               < 0) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Compress device does not support "
>+                               "this window size\n");
>+                       return -1;
>+               }
>+       } else
>+               /* Set window size to PMD maximum if none was specified */
>+               test_data->window_sz = cap->window_size.max;
>+
>+       /* Check if chained mbufs is supported */
>+       if (test_data->max_sgl_segs > 1  &&
>+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
>+               RTE_LOG(INFO, USER1, "Compress device does not support "
>+                               "chained mbufs. Max SGL segments set to 1\n");
>+               test_data->max_sgl_segs = 1;
>+       }
>+
>+       /* Level 0 support */
>+       if (test_data->level.min == 0 &&
>+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
>+               RTE_LOG(ERR, USER1, "Compress device does not support "
>+                               "level 0 (no compression)\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+comp_perf_allocate_memory(struct comp_test_data *test_data)
>+{
>+       /* Number of segments for input and output
>+        * (compression and decompression)
>+        */
>+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
>+                       test_data->seg_sz);
>+       test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
>+                               total_segs,
>+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>+                               rte_socket_id());
>+       if (test_data->comp_buf_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>+               return -1;
>+       }
>+
>+       cleanup = ST_MEMORY_ALLOC;
>+       test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
>+                               total_segs,
>+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>+                               rte_socket_id());
>+       if (test_data->decomp_buf_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>+               return -1;
>+       }
Unless am missing to see it, you need to free pre-allocated memories here before return call for all failed cases.

>+
>+       test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
>+
>+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
>+                                 test_data->total_bufs,
>+                                 0, 0, rte_socket_id());
>+       if (test_data->op_pool == NULL) {
>+               RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
>+               return -1;
>+       }
>+
>+       /*
>+        * Compressed data might be a bit larger than input data,
>+        * if data cannot be compressed
>+        */
>+       test_data->compressed_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz * EXPANSE_RATIO
>+                                                       + MIN_ISAL_SIZE, 0,
MIN_ISAL_SIZE looks specific to ISAL driver. if so, then is this perf app specific to that PMD? or Can we make it somewhat generic?

>+                               rte_socket_id());
>+       if (test_data->compressed_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz, 0,
>+                               rte_socket_id());
>+       if (test_data->decompressed_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
>+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>+                       0, rte_socket_id());
>+       if (test_data->comp_bufs == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
>+                               " could not be allocated\n");
>+               return -1;
>+       }
>+
>+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
>+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>+                       0, rte_socket_id());
>+       if (test_data->decomp_bufs == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
>+                               " could not be allocated\n");
>+               return -1;
>+       }
>+       return 0;
>+}
>+
>+static int
>+comp_perf_dump_input_data(struct comp_test_data *test_data)
>+{
>+       FILE *f = fopen(test_data->input_file, "r");
>+       int ret = -1;
>+
>+       if (f == NULL) {
>+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
>+               return -1;
>+       }
>+
>+       if (fseek(f, 0, SEEK_END) != 0) {
>+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>+               goto end;
>+       }
>+       size_t actual_file_sz = ftell(f);
>+       /* If extended input data size has not been set,
>+        * input data size = file size
>+        */
>+
>+       if (test_data->input_data_sz == 0)
>+               test_data->input_data_sz = actual_file_sz;
>+
>+       if (fseek(f, 0, SEEK_SET) != 0) {
>+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>+               goto end;
>+       }
>+
>+       test_data->input_data = rte_zmalloc_socket(NULL,
>+                               test_data->input_data_sz, 0, rte_socket_id());
>+
>+       if (test_data->input_data == NULL) {
>+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>+                               "file could not be allocated\n");
>+               goto end;
>+       }
>+
>+       size_t remaining_data = test_data->input_data_sz;
>+       uint8_t *data = test_data->input_data;
>+
>+       while (remaining_data > 0) {
>+               size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
>+
>+               if (fread(data, data_to_read, 1, f) != 1) {
>+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>+                       goto end;
>+               }
>+               if (fseek(f, 0, SEEK_SET) != 0) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Size of input could not be calculated\n");
>+                       goto end;
>+               }
>+               remaining_data -= data_to_read;
>+               data += data_to_read;
>+       }
>+
>+       if (test_data->input_data_sz > actual_file_sz)
>+               RTE_LOG(INFO, USER1,
>+                 "%zu bytes read from file %s, extending the file %.2f times\n",
>+                       test_data->input_data_sz, test_data->input_file,
>+                       (double)test_data->input_data_sz/actual_file_sz);
>+       else
>+               RTE_LOG(INFO, USER1,
>+                       "%zu bytes read from file %s\n",
>+                       test_data->input_data_sz, test_data->input_file);
>+
>+       ret = 0;
>+
>+end:
>+       fclose(f);
>+       return ret;
>+}
>+
>+static int
>+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
>+{
>+       uint8_t enabled_cdev_count;
>+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
>+
>+       enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
>+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
>+       if (enabled_cdev_count == 0) {
>+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
>+                               test_data->driver_name);
>+               return -EINVAL;
>+       }
>+
>+       if (enabled_cdev_count > 1)
>+               RTE_LOG(INFO, USER1,
>+                       "Only the first compress device will be used\n");
>+
>+       test_data->cdev_id = enabled_cdevs[0];
>+
>+       if (comp_perf_check_capabilities(test_data) < 0)
>+               return -1;
>+
>+       /* Configure compressdev (one device, one queue pair) */
>+       struct rte_compressdev_config config = {
>+               .socket_id = rte_socket_id(),
>+               .nb_queue_pairs = 1,
>+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
>+               .max_nb_streams = 0
>+       };
>+
>+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
>+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
>+               return -1;
>+       }
>+
>+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
>+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
>+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
>+               return -1;
>+       }
>+
>+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
>+               RTE_LOG(ERR, USER1, "Device could not be started\n");
>+               return -1;
>+       }
>+
>+       return 0;
>+}
>+
>+static int
>+prepare_bufs(struct comp_test_data *test_data)
>+{
>+       uint32_t remaining_data = test_data->input_data_sz;
>+       uint8_t *input_data_ptr = test_data->input_data;
>+       size_t data_sz;
>+       uint8_t *data_addr;
>+       uint32_t i, j;
>+
>+       for (i = 0; i < test_data->total_bufs; i++) {
>+               /* Allocate data in input mbuf and copy data from input file */
>+               test_data->decomp_bufs[i] =
>+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>+               if (test_data->decomp_bufs[i] == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>+                       return -1;
>+               }
>+
>+               cleanup = ST_PREPARE_BUF;
>+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>+               data_addr = (uint8_t *) rte_pktmbuf_append(
>+                                       test_data->decomp_bufs[i], data_sz);
>+               if (data_addr == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not append data\n");
So is here ..free of allocated buffer before return from failed cases

Thanks
Shally
>+                       return -1;
>+               }
>+               rte_memcpy(data_addr, input_data_ptr, data_sz);
>+
>+               input_data_ptr += data_sz;
>+               remaining_data -= data_sz;
>+
>+               /* Already one segment in the mbuf */
>+               uint16_t segs_per_mbuf = 1;
>+
>+               /* Chain mbufs if needed for input mbufs */
>+               while (segs_per_mbuf < test_data->max_sgl_segs
>+                               && remaining_data > 0) {
>+                       struct rte_mbuf *next_seg =
>+                               rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>+
>+                       if (next_seg == NULL) {
>+                               RTE_LOG(ERR, USER1,
>+                                       "Could not allocate mbuf\n");
>+                               return -1;
>+                       }
>+
>+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>+                               data_sz);
>+
>+                       if (data_addr == NULL) {
>+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>+                               return -1;
>+                       }
>+
>+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
>+                       input_data_ptr += data_sz;
>+                       remaining_data -= data_sz;
>+
>+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
>+                                       next_seg) < 0) {
>+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>+                               return -1;
>+                       }
>+                       segs_per_mbuf++;
>+               }
>+
>+               /* Allocate data in output mbuf */
>+               test_data->comp_bufs[i] =
>+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
>+               if (test_data->comp_bufs[i] == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>+                       return -1;
>+               }
>+               data_addr = (uint8_t *) rte_pktmbuf_append(
>+                                       test_data->comp_bufs[i],
>+                                       test_data->seg_sz);
>+               if (data_addr == NULL) {
>+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>+                       return -1;
>+               }
>+
>+               /* Chain mbufs if needed for output mbufs */
>+               for (j = 1; j < segs_per_mbuf; j++) {
>+                       struct rte_mbuf *next_seg =
>+                               rte_pktmbuf_alloc(test_data->comp_buf_pool);
>+
>+                       if (next_seg == NULL) {
>+                               RTE_LOG(ERR, USER1,
>+                                       "Could not allocate mbuf\n");
>+                               return -1;
>+                       }
>+
>+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>+                               test_data->seg_sz);
>+
>+                       if (data_addr == NULL) {
>+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>+                               return -1;
>+                       }
>+
>+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
>+                                       next_seg) < 0) {
>+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>+                               return -1;
>+                       }
>+               }
>+       }
>+
>+       return 0;
>+}
>+
>+static void
>+free_bufs(struct comp_test_data *test_data)
>+{
>+       uint32_t i;
>+
>+       for (i = 0; i < test_data->total_bufs; i++) {
>+               rte_pktmbuf_free(test_data->comp_bufs[i]);
>+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
>+       }
>+}
>+
>+static int
>+main_loop(struct comp_test_data *test_data, uint8_t level,
>+                       enum rte_comp_xform_type type,
>+                       uint8_t *output_data_ptr,
>+                       size_t *output_data_sz,
>+                       unsigned int benchmarking)
>+{
>+       uint8_t dev_id = test_data->cdev_id;
>+       uint32_t i, iter, num_iter;
>+       struct rte_comp_op **ops, **deq_ops;
>+       void *priv_xform = NULL;
>+       struct rte_comp_xform xform;
>+       size_t output_size = 0;
>+       struct rte_mbuf **input_bufs, **output_bufs;
>+       int res = 0;
>+       int allocated = 0;
>+
>+       if (test_data == NULL || !test_data->burst_sz) {
>+               RTE_LOG(ERR, USER1,
>+                       "Unknown burst size\n");
>+               return -1;
>+       }
>+
>+       ops = rte_zmalloc_socket(NULL,
>+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
>+               0, rte_socket_id());
>+
>+       if (ops == NULL) {
>+               RTE_LOG(ERR, USER1,
>+                       "Can't allocate memory for ops strucures\n");
>+               return -1;
>+       }
>+
>+       deq_ops = &ops[test_data->total_bufs];
>+
>+       if (type == RTE_COMP_COMPRESS) {
>+               xform = (struct rte_comp_xform) {
>+                       .type = RTE_COMP_COMPRESS,
>+                       .compress = {
>+                               .algo = RTE_COMP_ALGO_DEFLATE,
>+                               .deflate.huffman = test_data->huffman_enc,
>+                               .level = level,
>+                               .window_size = test_data->window_sz,
>+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>+                       }
>+               };
>+               input_bufs = test_data->decomp_bufs;
>+               output_bufs = test_data->comp_bufs;
>+       } else {
>+               xform = (struct rte_comp_xform) {
>+                       .type = RTE_COMP_DECOMPRESS,
>+                       .decompress = {
>+                               .algo = RTE_COMP_ALGO_DEFLATE,
>+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>+                               .window_size = test_data->window_sz,
>+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>+                       }
>+               };
>+               input_bufs = test_data->comp_bufs;
>+               output_bufs = test_data->decomp_bufs;
>+       }
>+
>+       /* Create private xform */
>+       if (rte_compressdev_private_xform_create(dev_id, &xform,
>+                       &priv_xform) < 0) {
>+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
>+               res = -1;
>+               goto end;
>+       }
>+
>+       uint64_t tsc_start, tsc_end, tsc_duration;
>+
>+       tsc_start = tsc_end = tsc_duration = 0;
>+       if (benchmarking) {
>+               tsc_start = rte_rdtsc();
>+               num_iter = test_data->num_iter;
>+       } else
>+               num_iter = 1;
>+
>+       for (iter = 0; iter < num_iter; iter++) {
>+               uint32_t total_ops = test_data->total_bufs;
>+               uint32_t remaining_ops = test_data->total_bufs;
>+               uint32_t total_deq_ops = 0;
>+               uint32_t total_enq_ops = 0;
>+               uint16_t ops_unused = 0;
>+               uint16_t num_enq = 0;
>+               uint16_t num_deq = 0;
>+
>+               output_size = 0;
>+
>+               while (remaining_ops > 0) {
>+                       uint16_t num_ops = RTE_MIN(remaining_ops,
>+                                                  test_data->burst_sz);
>+                       uint16_t ops_needed = num_ops - ops_unused;
>+
>+                       /*
>+                        * Move the unused operations from the previous
>+                        * enqueue_burst call to the front, to maintain order
>+                        */
>+                       if ((ops_unused > 0) && (num_enq > 0)) {
>+                               size_t nb_b_to_mov =
>+                                     ops_unused * sizeof(struct rte_comp_op *);
>+
>+                               memmove(ops, &ops[num_enq], nb_b_to_mov);
>+                       }
>+
>+                       /* Allocate compression operations */
>+                       if (ops_needed && !rte_comp_op_bulk_alloc(
>+                                               test_data->op_pool,
>+                                               &ops[ops_unused],
>+                                               ops_needed)) {
>+                               RTE_LOG(ERR, USER1,
>+                                     "Could not allocate enough operations\n");
>+                               res = -1;
>+                               goto end;
>+                       }
>+                       allocated += ops_needed;
>+
>+                       for (i = 0; i < ops_needed; i++) {
>+                               /*
>+                                * Calculate next buffer to attach to operation
>+                                */
>+                               uint32_t buf_id = total_enq_ops + i +
>+                                               ops_unused;
>+                               uint16_t op_id = ops_unused + i;
>+                               /* Reset all data in output buffers */
>+                               struct rte_mbuf *m = output_bufs[buf_id];
>+
>+                               m->pkt_len = test_data->seg_sz * m->nb_segs;
>+                               while (m) {
>+                                       m->data_len = m->buf_len - m->data_off;
>+                                       m = m->next;
>+                               }
>+                               ops[op_id]->m_src = input_bufs[buf_id];
>+                               ops[op_id]->m_dst = output_bufs[buf_id];
>+                               ops[op_id]->src.offset = 0;
>+                               ops[op_id]->src.length =
>+                                       rte_pktmbuf_pkt_len(input_bufs[buf_id]);
>+                               ops[op_id]->dst.offset = 0;
>+                               ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
>+                               ops[op_id]->input_chksum = buf_id;
>+                               ops[op_id]->private_xform = priv_xform;
>+                       }
>+
>+                       num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
>+                                                               num_ops);
>+                       ops_unused = num_ops - num_enq;
>+                       remaining_ops -= num_enq;
>+                       total_enq_ops += num_enq;
>+
>+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
>+                                                          deq_ops,
>+                                                          test_data->burst_sz);
>+                       total_deq_ops += num_deq;
>+                       if (benchmarking == 0) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       const void *read_data_addr =
>+                                               rte_pktmbuf_read(op->m_dst, 0,
>+                                               op->produced, output_data_ptr);
>+                                       if (read_data_addr == NULL) {
>+                                               RTE_LOG(ERR, USER1,
>+                                     "Could not copy buffer in destination\n");
>+                                               res = -1;
>+                                               goto end;
>+                                       }
>+
>+                                       if (read_data_addr != output_data_ptr)
>+                                               rte_memcpy(output_data_ptr,
>+                                                       rte_pktmbuf_mtod(
>+                                                         op->m_dst, uint8_t *),
>+                                                       op->produced);
>+                                       output_data_ptr += op->produced;
>+                                       output_size += op->produced;
>+
>+                               }
>+                       }
>+
>+                       if (iter == num_iter - 1) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       struct rte_mbuf *m = op->m_dst;
>+
>+                                       m->pkt_len = op->produced;
>+                                       uint32_t remaining_data = op->produced;
>+                                       uint16_t data_to_append;
>+
>+                                       while (remaining_data > 0) {
>+                                               data_to_append =
>+                                                       RTE_MIN(remaining_data,
>+                                                            test_data->seg_sz);
>+                                               m->data_len = data_to_append;
>+                                               remaining_data -=
>+                                                               data_to_append;
>+                                               m = m->next;
>+                                       }
>+                               }
>+                       }
>+                       rte_mempool_put_bulk(test_data->op_pool,
>+                                            (void **)deq_ops, num_deq);
>+                       allocated -= num_deq;
>+               }
>+
>+               /* Dequeue the last operations */
>+               while (total_deq_ops < total_ops) {
>+                       num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
>+                                               deq_ops, test_data->burst_sz);
>+                       total_deq_ops += num_deq;
>+                       if (benchmarking == 0) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       const void *read_data_addr =
>+                                               rte_pktmbuf_read(op->m_dst, 0,
>+                                               op->produced, output_data_ptr);
>+                                       if (read_data_addr == NULL) {
>+                                               RTE_LOG(ERR, USER1,
>+                                     "Could not copy buffer in destination\n");
>+                                               res = -1;
>+                                               goto end;
>+                                       }
>+
>+                                       if (read_data_addr != output_data_ptr)
>+                                               rte_memcpy(output_data_ptr,
>+                                                       rte_pktmbuf_mtod(
>+                                                       op->m_dst, uint8_t *),
>+                                                       op->produced);
>+                                       output_data_ptr += op->produced;
>+                                       output_size += op->produced;
>+
>+                               }
>+                       }
>+
>+                       if (iter == num_iter - 1) {
>+                               for (i = 0; i < num_deq; i++) {
>+                                       struct rte_comp_op *op = deq_ops[i];
>+                                       struct rte_mbuf *m = op->m_dst;
>+
>+                                       m->pkt_len = op->produced;
>+                                       uint32_t remaining_data = op->produced;
>+                                       uint16_t data_to_append;
>+
>+                                       while (remaining_data > 0) {
>+                                               data_to_append =
>+                                               RTE_MIN(remaining_data,
>+                                                       test_data->seg_sz);
>+                                               m->data_len = data_to_append;
>+                                               remaining_data -=
>+                                                               data_to_append;
>+                                               m = m->next;
>+                                       }
>+                               }
>+                       }
>+                       rte_mempool_put_bulk(test_data->op_pool,
>+                                            (void **)deq_ops, num_deq);
>+                       allocated -= num_deq;
>+               }
>+       }
>+
>+       if (benchmarking) {
>+               tsc_end = rte_rdtsc();
>+               tsc_duration = tsc_end - tsc_start;
>+
>+               if (type == RTE_COMP_COMPRESS)
>+                       test_data->comp_tsc_duration[level] =
>+                                       tsc_duration / num_iter;
>+               else
>+                       test_data->decomp_tsc_duration[level] =
>+                                       tsc_duration / num_iter;
>+       }
>+
>+       if (benchmarking == 0 && output_data_sz)
>+               *output_data_sz = output_size;
>+end:
>+       rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
>+       rte_compressdev_private_xform_free(dev_id, priv_xform);
>+       rte_free(ops);
>+       return res;
>+}
>+
> int
> main(int argc, char **argv)
> {
>-       int ret;
>+       uint8_t level, level_idx = 0;
>+       int ret, i;
>        struct comp_test_data *test_data;
>
>        /* Initialise DPDK EAL */
>@@ -29,24 +743,186 @@ main(int argc, char **argv)
>                rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
>                                rte_socket_id());
>
>+       cleanup = ST_TEST_DATA;
>        comp_perf_options_default(test_data);
>
>        if (comp_perf_options_parse(test_data, argc, argv) < 0) {
>                RTE_LOG(ERR, USER1,
>                        "Parsing one or more user options failed\n");
>                ret = EXIT_FAILURE;
>-               goto err;
>+               goto end;
>        }
>
>        if (comp_perf_options_check(test_data) < 0) {
>                ret = EXIT_FAILURE;
>-               goto err;
>+               goto end;
>+       }
>+
>+       if (comp_perf_initialize_compressdev(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto end;
>+       }
>+
>+       cleanup = ST_COMPDEV;
>+       if (comp_perf_dump_input_data(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto end;
>+       }
>+
>+       cleanup = ST_INPUT_DATA;
>+       if (comp_perf_allocate_memory(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto end;
>+       }
>+
>+       if (prepare_bufs(test_data) < 0) {
>+               ret = EXIT_FAILURE;
>+               goto end;
>+       }
>+
>+       if (test_data->level.inc != 0)
>+               level = test_data->level.min;
>+       else
>+               level = test_data->level.list[0];
>+
>+       size_t comp_data_sz;
>+       size_t decomp_data_sz;
>+
>+       printf("Burst size = %u\n", test_data->burst_sz);
>+       printf("File size = %zu\n", test_data->input_data_sz);
>+
>+       printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
>+               "Level", "Comp size", "Comp ratio [%]",
>+               "Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
>+               "Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
>+
>+       cleanup = ST_DURING_TEST;
>+       while (level <= test_data->level.max) {
>+               /*
>+                * Run a first iteration, to verify compression and
>+                * get the compression ratio for the level
>+                */
>+               if (main_loop(test_data, level, RTE_COMP_COMPRESS,
>+                             test_data->compressed_data,
>+                             &comp_data_sz, 0) < 0) {
>+                       ret = EXIT_FAILURE;
>+                       goto end;
>+               }
>+
>+               if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
>+                             test_data->decompressed_data,
>+                             &decomp_data_sz, 0) < 0) {
>+                       ret = EXIT_FAILURE;
>+                       goto end;
>+               }
>+
>+               if (decomp_data_sz != test_data->input_data_sz) {
>+                       RTE_LOG(ERR, USER1,
>+                  "Decompressed data length not equal to input data length\n");
>+                       RTE_LOG(ERR, USER1,
>+                               "Decompressed size = %zu, expected = %zu\n",
>+                               decomp_data_sz, test_data->input_data_sz);
>+                       ret = EXIT_FAILURE;
>+                       goto end;
>+               } else {
>+                       if (memcmp(test_data->decompressed_data,
>+                                       test_data->input_data,
>+                                       test_data->input_data_sz) != 0) {
>+                               RTE_LOG(ERR, USER1,
>+                           "Decompressed data is not the same as file data\n");
>+                               ret = EXIT_FAILURE;
>+                               goto end;
>+                       }
>+               }
>+
>+               double ratio = (double) comp_data_sz /
>+                                               test_data->input_data_sz * 100;
>+
>+               /*
>+                * Run the tests twice, discarding the first performance
>+                * results, before the cache is warmed up
>+                */
>+               for (i = 0; i < 2; i++) {
>+                       if (main_loop(test_data, level, RTE_COMP_COMPRESS,
>+                                       NULL, NULL, 1) < 0) {
>+                               ret = EXIT_FAILURE;
>+                               goto end;
>+                       }
>+               }
>+
>+               for (i = 0; i < 2; i++) {
>+                       if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
>+                                       NULL, NULL, 1) < 0) {
>+                               ret = EXIT_FAILURE;
>+                               goto end;
>+                       }
>+               }
>+
>+               uint64_t comp_tsc_duration =
>+                               test_data->comp_tsc_duration[level];
>+               double comp_tsc_byte = (double)comp_tsc_duration /
>+                                               test_data->input_data_sz;
>+               double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
>+                               1000000000;
>+               uint64_t decomp_tsc_duration =
>+                               test_data->decomp_tsc_duration[level];
>+               double decomp_tsc_byte = (double)decomp_tsc_duration /
>+                                               test_data->input_data_sz;
>+               double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
>+                               1000000000;
>+
>+               printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
>+                                       "%15.2f%21"PRIu64"%23.2f%16.2f\n",
>+                      level, comp_data_sz, ratio, comp_tsc_duration,
>+                      comp_tsc_byte, comp_gbps, decomp_tsc_duration,
>+                      decomp_tsc_byte, decomp_gbps);
>+
>+               if (test_data->level.inc != 0)
>+                       level += test_data->level.inc;
>+               else {
>+                       if (++level_idx == test_data->level.count)
>+                               break;
>+                       level = test_data->level.list[level_idx];
>+               }
>        }
>
>        ret = EXIT_SUCCESS;
>
>-err:
>-       rte_free(test_data);
>+end:
>+       switch (cleanup) {
>
>+       case ST_DURING_TEST:
>+       case ST_PREPARE_BUF:
>+               free_bufs(test_data);
>+               /* fallthrough */
>+       case ST_MEMORY_ALLOC:
>+               rte_free(test_data->decomp_bufs);
>+               rte_free(test_data->comp_bufs);
>+               rte_free(test_data->decompressed_data);
>+               rte_free(test_data->compressed_data);
>+               rte_mempool_free(test_data->op_pool);
>+               rte_mempool_free(test_data->decomp_buf_pool);
>+               rte_mempool_free(test_data->comp_buf_pool);
>+               /* fallthrough */
>+       case ST_INPUT_DATA:
>+               rte_free(test_data->input_data);
>+               /* fallthrough */
>+       case ST_COMPDEV:
>+               if (test_data->cdev_id != -1)
>+                       rte_compressdev_stop(test_data->cdev_id);
>+               /* fallthrough */
>+       case ST_TEST_DATA:
>+               rte_free(test_data);
>+               /* fallthrough */
>+       case ST_CLEAR:
>+       default:
>+               i = rte_eal_cleanup();
>+               if (i) {
>+                       RTE_LOG(ERR, USER1,
>+                               "Error from rte_eal_cleanup(), %d\n", i);
>+                       ret = i;
>+               }
>+               break;
>+       }
>        return ret;
> }
>--
>2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files
  2018-11-02  9:44   ` [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-05  8:57     ` Verma, Shally
  2018-11-06  8:51       ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-05  8:57 UTC (permalink / raw)
  To: Tomasz Jozwiak, dev, fiona.trahe, akhil.goyal



>-----Original Message-----
>From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>Sent: 02 November 2018 15:14
>To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com; Verma, Shally <Shally.Verma@cavium.com>;
>akhil.goyal@nxp.com
>Subject: [PATCH v2 3/3] doc/guides/tools: add doc files
>
>External Email
>
>Added:
> -  initial version of compression performance test
>    description file.
> -  release note in release_18_11.rst
>
>Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>---
> MAINTAINERS                            |  5 +++
> doc/guides/rel_notes/release_18_11.rst |  6 +++
> doc/guides/tools/comp_perf.rst         | 75 ++++++++++++++++++++++++++++++++++
> 3 files changed, 86 insertions(+)
> create mode 100644 doc/guides/tools/comp_perf.rst
>
>diff --git a/MAINTAINERS b/MAINTAINERS
>index e60379d..cfda6dd 100644
>--- a/MAINTAINERS
>+++ b/MAINTAINERS
>@@ -1242,6 +1242,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
> F: app/test-pmd/
> F: doc/guides/testpmd_app_ug/
>
>+Compression performance test application
>+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>+F: app/test-compress-perf/
>+F: doc/guides/tools/comp_perf.rst
>+
> Crypto performance test application
> M: Declan Doherty <declan.doherty@intel.com>
> F: app/test-crypto-perf/
>diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
>index 376128f..8bc7d05 100644
>--- a/doc/guides/rel_notes/release_18_11.rst
>+++ b/doc/guides/rel_notes/release_18_11.rst
>@@ -285,6 +285,12 @@ New Features
>   this application doesn't need to launch dedicated worker threads for vhost
>   enqueue/dequeue operations.
>
>+* **Added a compression performance test tool.**
>+
>+   Added a new performance test tool to test the compressdev PMD. The tool tests
>+   compression ratio and compression throughput. Dynamic compression test is not
>+   supported yet.
>+
>
> API Changes
> -----------
>diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
>new file mode 100644
>index 0000000..2f43412
>--- /dev/null
>+++ b/doc/guides/tools/comp_perf.rst
>@@ -0,0 +1,75 @@
>+..  SPDX-License-Identifier: BSD-3-Clause
>+    Copyright(c) 2018 Intel Corporation.
>+
>+dpdk-test-crypto-perf Application
>+=================================
>+
>+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
>+utility that allows measuring performance parameters of PMDs available in the
>+compress tree. The application reads the data from a file (--input-file),
>+dumps all the file into a buffer and fills out the data of input mbufs,
>+which are passed to compress device with compression operations.
>+Then, the output buffers are fed into the decompression stage, and the resulting
>+data is compared against the original data (verification phase). After that,
>+a number of iterations are performed, compressing first and decompressing later,
>+to check the throughput rate
>+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
>+
>+
>+Limitations
>+~~~~~~~~~~~
>+
>+* Only supports the fixed compression.
Perf app seem to support stateless only for now, so should mention as "supported for fixed and stateless operation only."
Thanks
Shally

>+
>+Command line options
>+--------------------
>+
>+ ``--driver-name NAME``: compress driver to use
>+
>+ ``--input-file NAME``: file to compress and decompress
>+
>+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
>+
>+ ``--seg-sz N``: size of segment to store the data (default: 2048)
>+
>+ ``--burst-sz N``: compress operation burst size
>+
>+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
>+
>+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 16)
>+
>+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
>+
>+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
>+
>+ ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
>+
>+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
>+
>+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
>+
>+ ``-h``: prints this help
>+
>+
>+Compiling the Application
>+-------------------------
>+
>+**Step 1: PMD setting**
>+
>+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
>+can be disabled by default in the build configuration file ``common_base``.
>+The compression device drivers PMD which should be tested can be enabled by setting::
>+
>+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
>+
>+
>+Running the Application
>+-----------------------
>+
>+The tool application has a number of command line options. Here is the sample command line:
>+
>+.. code-block:: console
>+
>+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
>+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
>+
>--
>2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-05  8:34       ` Verma, Shally
@ 2018-11-06  8:04         ` Jozwiak, TomaszX
  2018-11-06  8:15           ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-06  8:04 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal

Hi Shally,

Please see my comment inline.

> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Monday, November 5, 2018 9:34 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>
> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >Sent: 02 November 2018 15:29
> >To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
> >akhil.goyal@nxp.com; Verma, Shally <Shally.Verma@cavium.com>; De Lara
> >Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >Hi Shally,
> >
> >Sorry for delay - I was on sick leave.
> >We had some issues with dynamic compression test so I block this test
> >in V2. May be there's too late to add this into this release but we've decided
> to send this V2 to DPDK.
> >
> >My comment inline (not all have answer so far, still working on that)
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Friday, October 12, 2018 12:16 PM
> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara
> >> Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> performance measurement
> >>
> >> HI TomaszX
> >>
> >> Sorry for delay in response. Comments inline.
> >>
> >> >-----Original Message-----
> >> >From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
> >> >Sent: 01 October 2018 18:57
> >> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >> >akhil.goyal@nxp.com; pablo.de.lara.guarch@intel.com
> >> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >> >measurement
> >> >
> >> >External Email
> >> >
> >> >Added performance measurement part into compression perf. test.
> >> >
> >> >Signed-off-by: De Lara Guarch, Pablo
> >> ><pablo.de.lara.guarch@intel.com>
> >> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >> >---
> >> > app/test-compress-perf/main.c | 844
> >> >++++++++++++++++++++++++++++++++++++++++++
> >> > 1 file changed, 844 insertions(+)
> >> >
> >> >diff --git a/app/test-compress-perf/main.c
> >> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
> >> >--- a/app/test-compress-perf/main.c
> >> >+++ b/app/test-compress-perf/main.c
> >> >@@ -5,13 +5,721 @@
> >> > #include <rte_malloc.h>
> >> > #include <rte_eal.h>
> >> > #include <rte_log.h>
> >> >+#include <rte_cycles.h>
> >> > #include <rte_compressdev.h>
> >> >
> >> > #include "comp_perf_options.h"
> >> >
> >> >+#define NUM_MAX_XFORMS 16
> >> >+#define NUM_MAX_INFLIGHT_OPS 512
> >> >+#define EXPANSE_RATIO 1.05
> >> >+#define MIN_ISAL_SIZE 8
> >> >+
> >> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >> >+
> >> >+static int
> >> >+param_range_check(uint16_t size, const struct rte_param_log2_range
> >> >+*range) {
> >> >+       unsigned int next_size;
> >> >+
> >> >+       /* Check lower/upper bounds */
> >> >+       if (size < range->min)
> >> >+               return -1;
> >> >+
> >> >+       if (size > range->max)
> >> >+               return -1;
> >> >+
> >> >+       /* If range is actually only one value, size is correct */
> >> >+       if (range->increment == 0)
> >> >+               return 0;
> >> >+
> >> >+       /* Check if value is one of the supported sizes */
> >> >+       for (next_size = range->min; next_size <= range->max;
> >> >+                       next_size += range->increment)
> >> >+               if (size == next_size)
> >> >+                       return 0;
> >> >+
> >> >+       return -1;
> >> >+}
> >> >+
> >> >+static int
> >> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >> >+       const struct rte_compressdev_capabilities *cap;
> >> >+
> >> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >> >+                                            RTE_COMP_ALGO_DEFLATE);
> >> >+
> >> >+       if (cap == NULL) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not support DEFLATE\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >> >+
> >> >+       /* Huffman enconding */
> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not supported Fixed Huffman\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC
> &&
> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
> {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not supported Dynamic
> Huffman\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       /* Window size */
> >> >+       if (test_data->window_sz != -1) {
> >> >+               if (param_range_check(test_data->window_sz,
> >> >+ &cap->window_size)
> >> What if cap->window_size is 0 i.e. implementation default?
> >
> >TJ: You probably mean cap->window_size.increment = 0 (because
> >cap->window_size is a structure). In that case we check if
> >test_data->window_sz >=min and test_data->window_sz <= max only,
> because increment = 0 means (base on compression API) we have only one
> value of windows_size (no range is supported).
> But PMD can set min and max too 0 for such case.

TJ: I can't see any issue in that case too. Maybe I don't understand what you mean but the logic is as follow:
1)  if you pass '--window-sz  ...' param. into command line your intention is to force that value of window size during test. We check is this value is allow (by param_range_check() function).
2) if you plan to use default value - just don't pass '--window-sz' param. in command line at all. In that case we get windows size from window_size.max field, so if window_size.min= window_size.max=0 
test_data->window_sz will be zero, as well. 
If you mean that behavior is not good - I will be grateful for other suggestions.

> 
> >
> >
> >
> ....
> 
> >> >+
> >> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
> >> >+                       goto err;
> >> >+               }
> >> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >> >+                       RTE_LOG(ERR, USER1,
> >> >+                               "Size of input could not be calculated\n");
> >> >+                       goto err;
> >> >+               }
> >> >+               remaining_data -= data_to_read;
> >> >+               data += data_to_read;
> >> It looks like it will run 2nd time only if input file size < input
> >> data size in which case it will just keep filling input buffer with repeated
> data.
> >> Is that the intention here?
> >
> >TJ: Yes exactly. If test_data->input_data_sz is bigger than
> >actual_file_sz then we fill the buffer with repeated data from file to fill
> whole buffer.
> I mentioned in one of the earlier reply, wont that then influence the
> compression behaviour and o/p? my suggestion was to work on actual user
> provided input to take perf to get actual perf for given content.

TJ: You right, but this solution is flexible. You can pass ' --extended-input-sz" or not, so you can use original input data or extend it if you want.

> 
> >
> >>
> ...
> 
> >> >+                       if (data_addr == NULL) {
> >> >+                               RTE_LOG(ERR, USER1, "Could not
> >> >+ append data\n");
> >> Since a new buffer per segment is allocated, so is it possible for
> >> append to fail? think, this check is redundant here.
> >
> >TJ: Yes, you're right, it should never fail. But I think it's good coding practice
> to add the check just in case.
> >
> Unless it is called in data path which might cost perf a bit.

TJ:  prepare_bufs() is out of perf measurement, so shouldn't impact to measurements. The performance measurement is inside main_loop() only.


Br, Tomek

> 
> Thanks
> Shally
> 
> >> >+                               return -1;
> >> >+                       }
> >> >+
> >> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
> >> >+                       input_data_ptr += data_sz;
> >> >+                       remaining_data -= data_sz;
> >> >+
> >> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
> >> >+                                       next_seg) < 0) {
> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >> >+                               return -1;
> >> >+                       }
> >> >+                       segs_per_mbuf++;
> >> >+               }
> >> >+
> >> >+               /* Allocate data in output mbuf */
> >> >+               test_data->comp_bufs[i] =
> >> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >> >+               if (test_data->comp_bufs[i] == NULL) {
> >> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >> >+                       return -1;
> >> >+               }
> >> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >> >+                                       test_data->comp_bufs[i],
> >> >+                                       test_data->seg_sz);
> >> >+               if (data_addr == NULL) {
> >> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >> >+                       return -1;
> >> >+               }
> >> >+
> >> >+               /* Chain mbufs if needed for output mbufs */
> >> >+               for (j = 1; j < segs_per_mbuf; j++) {
> >> >+                       struct rte_mbuf *next_seg =
> >> >+
> >> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >> >+
> >> >+                       if (next_seg == NULL) {
> >> >+                               RTE_LOG(ERR, USER1,
> >> >+                                       "Could not allocate mbuf\n");
> >> >+                               return -1;
> >> >+                       }
> >> >+
> >> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >> >+                               test_data->seg_sz);
> >> >+
> >> >+                       if (data_addr == NULL) {
> >> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
> >> >+                               return -1;
> >> >+                       }
> >> >+
> >> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
> >> >+                                       next_seg) < 0) {
> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >> >+                               return -1;
> >> >+                       }
> >> >+               }
> >> >+       }
> >> >+
> >> >+       return 0;
> >> >+}
> >> >+
> >> >+static void
> >> >+free_bufs(struct comp_test_data *test_data) {
> >> >+       uint32_t i;
> >> >+
> >> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
> >> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
> >> >+       }
> >> >+       rte_free(test_data->comp_bufs);
> >> >+       rte_free(test_data->decomp_bufs); }
> >> >+
> >> >+static int
> >> >+main_loop(struct comp_test_data *test_data, uint8_t level,
> >> >+                       enum rte_comp_xform_type type,
> >> >+                       uint8_t *output_data_ptr,
> >> >+                       size_t *output_data_sz,
> >> >+                       unsigned int benchmarking) {
> >> >+       uint8_t dev_id = test_data->cdev_id;
> >> >+       uint32_t i, iter, num_iter;
> >> >+       struct rte_comp_op **ops, **deq_ops;
> >> >+       void *priv_xform = NULL;
> >> >+       struct rte_comp_xform xform;
> >> >+       size_t output_size = 0;
> >> >+       struct rte_mbuf **input_bufs, **output_bufs;
> >> >+       int res = 0;
> >> >+       int allocated = 0;
> >> >+
> >> >+       if (test_data == NULL || !test_data->burst_sz) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Unknow burst size\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       ops = rte_zmalloc_socket(NULL,
> >> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
> >> >+               0, rte_socket_id());
> >> >+
> >> >+       if (ops == NULL) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Can't allocate memory for ops strucures\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       deq_ops = &ops[test_data->total_bufs];
> >> >+
> >> >+       if (type == RTE_COMP_COMPRESS) {
> >> >+               xform = (struct rte_comp_xform) {
> >> >+                       .type = RTE_COMP_COMPRESS,
> >> >+                       .compress = {
> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >> >+                               .deflate.huffman = test_data->huffman_enc,
> >> >+                               .level = level,
> >> >+                               .window_size = test_data->window_sz,
> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >> >+                       }
> >> >+               };
> >> >+               input_bufs = test_data->decomp_bufs;
> >> >+               output_bufs = test_data->comp_bufs;
> >> >+       } else {
> >> >+               xform = (struct rte_comp_xform) {
> >> >+                       .type = RTE_COMP_DECOMPRESS,
> >> >+                       .decompress = {
> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >> >+                               .window_size = test_data->window_sz,
> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >> >+                       }
> >> >+               };
> >> >+               input_bufs = test_data->comp_bufs;
> >> >+               output_bufs = test_data->decomp_bufs;
> >> >+       }
> >> >+
> >> >+       /* Create private xform */
> >> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
> >> >+                       &priv_xform) < 0) {
> >> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
> >> >+               res = -1;
> >> >+               goto end;
> >> >+       }
> >> >+
> >> >+       uint64_t tsc_start, tsc_end, tsc_duration;
> >> >+
> >> >+       tsc_start = tsc_end = tsc_duration = 0;
> >> >+       if (benchmarking) {
> >> >+               tsc_start = rte_rdtsc();
> >> >+               num_iter = test_data->num_iter;
> >> >+       } else
> >> >+               num_iter = 1;
> >> Looks like in same code we're doing benchmarking and functional
> validation.
> >> It can be reorganised to keep validation test separately like done in
> >> crypto_perf.
> >
> >TJ: Ok, makes sense. However in the interests of getting this into the
> >18.11 release I'd like to defer this refactoring and the remainder of your
> comments below to the next release.
> >
> >
> >Next comments - WIP
> >
> >
> >Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-06  8:04         ` Jozwiak, TomaszX
@ 2018-11-06  8:15           ` Verma, Shally
  2018-11-06  9:05             ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-06  8:15 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal



>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 06 November 2018 13:34
>To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Hi Shally,
>
>Please see my comment inline.
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Monday, November 5, 2018 9:34 AM
>> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara Guarch, Pablo
>> <pablo.de.lara.guarch@intel.com>
>> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> measurement
>>
>>
>>
>> >-----Original Message-----
>> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>> >Sent: 02 November 2018 15:29
>> >To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
>> >akhil.goyal@nxp.com; Verma, Shally <Shally.Verma@cavium.com>; De Lara
>> >Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> >measurement
>> >
>> >External Email
>> >
>> >Hi Shally,
>> >
>> >Sorry for delay - I was on sick leave.
>> >We had some issues with dynamic compression test so I block this test
>> >in V2. May be there's too late to add this into this release but we've decided
>> to send this V2 to DPDK.
>> >
>> >My comment inline (not all have answer so far, still working on that)
>> >
>> >> -----Original Message-----
>> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> >> Sent: Friday, October 12, 2018 12:16 PM
>> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara
>> >> Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>> >> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
>> >> performance measurement
>> >>
>> >> HI TomaszX
>> >>
>> >> Sorry for delay in response. Comments inline.
>> >>
>> >> >-----Original Message-----
>> >> >From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
>> >> >Sent: 01 October 2018 18:57
>> >> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
>> >> >akhil.goyal@nxp.com; pablo.de.lara.guarch@intel.com
>> >> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> >> >measurement
>> >> >
>> >> >External Email
>> >> >
>> >> >Added performance measurement part into compression perf. test.
>> >> >
>> >> >Signed-off-by: De Lara Guarch, Pablo
>> >> ><pablo.de.lara.guarch@intel.com>
>> >> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>> >> >---
>> >> > app/test-compress-perf/main.c | 844
>> >> >++++++++++++++++++++++++++++++++++++++++++
>> >> > 1 file changed, 844 insertions(+)
>> >> >
>> >> >diff --git a/app/test-compress-perf/main.c
>> >> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
>> >> >--- a/app/test-compress-perf/main.c
>> >> >+++ b/app/test-compress-perf/main.c
>> >> >@@ -5,13 +5,721 @@
>> >> > #include <rte_malloc.h>
>> >> > #include <rte_eal.h>
>> >> > #include <rte_log.h>
>> >> >+#include <rte_cycles.h>
>> >> > #include <rte_compressdev.h>
>> >> >
>> >> > #include "comp_perf_options.h"
>> >> >
>> >> >+#define NUM_MAX_XFORMS 16
>> >> >+#define NUM_MAX_INFLIGHT_OPS 512
>> >> >+#define EXPANSE_RATIO 1.05
>> >> >+#define MIN_ISAL_SIZE 8
>> >> >+
>> >> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
>> >> >+
>> >> >+static int
>> >> >+param_range_check(uint16_t size, const struct rte_param_log2_range
>> >> >+*range) {
>> >> >+       unsigned int next_size;
>> >> >+
>> >> >+       /* Check lower/upper bounds */
>> >> >+       if (size < range->min)
>> >> >+               return -1;
>> >> >+
>> >> >+       if (size > range->max)
>> >> >+               return -1;
>> >> >+
>> >> >+       /* If range is actually only one value, size is correct */
>> >> >+       if (range->increment == 0)
>> >> >+               return 0;
>> >> >+
>> >> >+       /* Check if value is one of the supported sizes */
>> >> >+       for (next_size = range->min; next_size <= range->max;
>> >> >+                       next_size += range->increment)
>> >> >+               if (size == next_size)
>> >> >+                       return 0;
>> >> >+
>> >> >+       return -1;
>> >> >+}
>> >> >+
>> >> >+static int
>> >> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
>> >> >+       const struct rte_compressdev_capabilities *cap;
>> >> >+
>> >> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>> >> >+                                            RTE_COMP_ALGO_DEFLATE);
>> >> >+
>> >> >+       if (cap == NULL) {
>> >> >+               RTE_LOG(ERR, USER1,
>> >> >+                       "Compress device does not support DEFLATE\n");
>> >> >+               return -1;
>> >> >+       }
>> >> >+
>> >> >+       uint64_t comp_flags = cap->comp_feature_flags;
>> >> >+
>> >> >+       /* Huffman enconding */
>> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>> >> >+               RTE_LOG(ERR, USER1,
>> >> >+                       "Compress device does not supported Fixed Huffman\n");
>> >> >+               return -1;
>> >> >+       }
>> >> >+
>> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC
>> &&
>> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
>> {
>> >> >+               RTE_LOG(ERR, USER1,
>> >> >+                       "Compress device does not supported Dynamic
>> Huffman\n");
>> >> >+               return -1;
>> >> >+       }
>> >> >+
>> >> >+       /* Window size */
>> >> >+       if (test_data->window_sz != -1) {
>> >> >+               if (param_range_check(test_data->window_sz,
>> >> >+ &cap->window_size)
>> >> What if cap->window_size is 0 i.e. implementation default?
>> >
>> >TJ: You probably mean cap->window_size.increment = 0 (because
>> >cap->window_size is a structure). In that case we check if
>> >test_data->window_sz >=min and test_data->window_sz <= max only,
>> because increment = 0 means (base on compression API) we have only one
>> value of windows_size (no range is supported).
>> But PMD can set min and max too 0 for such case.
>
>TJ: I can't see any issue in that case too. Maybe I don't understand what you mean but the logic is as follow:
>1)  if you pass '--window-sz  ...' param. into command line your intention is to force that value of window size during test. We check is
>this value is allow (by param_range_check() function).
>2) if you plan to use default value - just don't pass '--window-sz' param. in command line at all. In that case we get windows size from
>window_size.max field, so if window_size.min= window_size.max=0
>test_data->window_sz will be zero, as well.
>If you mean that behavior is not good - I will be grateful for other suggestions.

This is fine. but I am thinking of 3rd case here:
c) user pass window sz but PMD window_sz.min = max = 0, then user requested windowsz is not applicable right?!

>
>>
>> >
>> >
>> >
>> ....
>>
>> >> >+
>> >> >+               if (fread(data, data_to_read, 1, f) != 1) {
>> >> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>> >> >+                       goto err;
>> >> >+               }
>> >> >+               if (fseek(f, 0, SEEK_SET) != 0) {
>> >> >+                       RTE_LOG(ERR, USER1,
>> >> >+                               "Size of input could not be calculated\n");
>> >> >+                       goto err;
>> >> >+               }
>> >> >+               remaining_data -= data_to_read;
>> >> >+               data += data_to_read;
>> >> It looks like it will run 2nd time only if input file size < input
>> >> data size in which case it will just keep filling input buffer with repeated
>> data.
>> >> Is that the intention here?
>> >
>> >TJ: Yes exactly. If test_data->input_data_sz is bigger than
>> >actual_file_sz then we fill the buffer with repeated data from file to fill
>> whole buffer.
>> I mentioned in one of the earlier reply, wont that then influence the
>> compression behaviour and o/p? my suggestion was to work on actual user
>> provided input to take perf to get actual perf for given content.
>
>TJ: You right, but this solution is flexible. You can pass ' --extended-input-sz" or not, so you can use original input data or extend it if
>you want.
Ok. but still not sure if it's really needed. Might be practically most of the time it wont be exercised. No hard opinion on this though.

Thanks
Shally
>
>>
>> >
>> >>
>> ...
>>
>> >> >+                       if (data_addr == NULL) {
>> >> >+                               RTE_LOG(ERR, USER1, "Could not
>> >> >+ append data\n");
>> >> Since a new buffer per segment is allocated, so is it possible for
>> >> append to fail? think, this check is redundant here.
>> >
>> >TJ: Yes, you're right, it should never fail. But I think it's good coding practice
>> to add the check just in case.
>> >
>> Unless it is called in data path which might cost perf a bit.
>
>TJ:  prepare_bufs() is out of perf measurement, so shouldn't impact to measurements. The performance measurement is inside
>main_loop() only.
>
>
>Br, Tomek
>
>>
>> Thanks
>> Shally
>>
>> >> >+                               return -1;
>> >> >+                       }
>> >> >+
>> >> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
>> >> >+                       input_data_ptr += data_sz;
>> >> >+                       remaining_data -= data_sz;
>> >> >+
>> >> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
>> >> >+                                       next_seg) < 0) {
>> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >> >+                               return -1;
>> >> >+                       }
>> >> >+                       segs_per_mbuf++;
>> >> >+               }
>> >> >+
>> >> >+               /* Allocate data in output mbuf */
>> >> >+               test_data->comp_bufs[i] =
>> >> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >> >+               if (test_data->comp_bufs[i] == NULL) {
>> >> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>> >> >+                       return -1;
>> >> >+               }
>> >> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
>> >> >+                                       test_data->comp_bufs[i],
>> >> >+                                       test_data->seg_sz);
>> >> >+               if (data_addr == NULL) {
>> >> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>> >> >+                       return -1;
>> >> >+               }
>> >> >+
>> >> >+               /* Chain mbufs if needed for output mbufs */
>> >> >+               for (j = 1; j < segs_per_mbuf; j++) {
>> >> >+                       struct rte_mbuf *next_seg =
>> >> >+
>> >> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >> >+
>> >> >+                       if (next_seg == NULL) {
>> >> >+                               RTE_LOG(ERR, USER1,
>> >> >+                                       "Could not allocate mbuf\n");
>> >> >+                               return -1;
>> >> >+                       }
>> >> >+
>> >> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>> >> >+                               test_data->seg_sz);
>> >> >+
>> >> >+                       if (data_addr == NULL) {
>> >> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>> >> >+                               return -1;
>> >> >+                       }
>> >> >+
>> >> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
>> >> >+                                       next_seg) < 0) {
>> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >> >+                               return -1;
>> >> >+                       }
>> >> >+               }
>> >> >+       }
>> >> >+
>> >> >+       return 0;
>> >> >+}
>> >> >+
>> >> >+static void
>> >> >+free_bufs(struct comp_test_data *test_data) {
>> >> >+       uint32_t i;
>> >> >+
>> >> >+       for (i = 0; i < test_data->total_bufs; i++) {
>> >> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
>> >> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
>> >> >+       }
>> >> >+       rte_free(test_data->comp_bufs);
>> >> >+       rte_free(test_data->decomp_bufs); }
>> >> >+
>> >> >+static int
>> >> >+main_loop(struct comp_test_data *test_data, uint8_t level,
>> >> >+                       enum rte_comp_xform_type type,
>> >> >+                       uint8_t *output_data_ptr,
>> >> >+                       size_t *output_data_sz,
>> >> >+                       unsigned int benchmarking) {
>> >> >+       uint8_t dev_id = test_data->cdev_id;
>> >> >+       uint32_t i, iter, num_iter;
>> >> >+       struct rte_comp_op **ops, **deq_ops;
>> >> >+       void *priv_xform = NULL;
>> >> >+       struct rte_comp_xform xform;
>> >> >+       size_t output_size = 0;
>> >> >+       struct rte_mbuf **input_bufs, **output_bufs;
>> >> >+       int res = 0;
>> >> >+       int allocated = 0;
>> >> >+
>> >> >+       if (test_data == NULL || !test_data->burst_sz) {
>> >> >+               RTE_LOG(ERR, USER1,
>> >> >+                       "Unknow burst size\n");
>> >> >+               return -1;
>> >> >+       }
>> >> >+
>> >> >+       ops = rte_zmalloc_socket(NULL,
>> >> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
>> >> >+               0, rte_socket_id());
>> >> >+
>> >> >+       if (ops == NULL) {
>> >> >+               RTE_LOG(ERR, USER1,
>> >> >+                       "Can't allocate memory for ops strucures\n");
>> >> >+               return -1;
>> >> >+       }
>> >> >+
>> >> >+       deq_ops = &ops[test_data->total_bufs];
>> >> >+
>> >> >+       if (type == RTE_COMP_COMPRESS) {
>> >> >+               xform = (struct rte_comp_xform) {
>> >> >+                       .type = RTE_COMP_COMPRESS,
>> >> >+                       .compress = {
>> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >> >+                               .deflate.huffman = test_data->huffman_enc,
>> >> >+                               .level = level,
>> >> >+                               .window_size = test_data->window_sz,
>> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >> >+                       }
>> >> >+               };
>> >> >+               input_bufs = test_data->decomp_bufs;
>> >> >+               output_bufs = test_data->comp_bufs;
>> >> >+       } else {
>> >> >+               xform = (struct rte_comp_xform) {
>> >> >+                       .type = RTE_COMP_DECOMPRESS,
>> >> >+                       .decompress = {
>> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >> >+                               .window_size = test_data->window_sz,
>> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >> >+                       }
>> >> >+               };
>> >> >+               input_bufs = test_data->comp_bufs;
>> >> >+               output_bufs = test_data->decomp_bufs;
>> >> >+       }
>> >> >+
>> >> >+       /* Create private xform */
>> >> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
>> >> >+                       &priv_xform) < 0) {
>> >> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
>> >> >+               res = -1;
>> >> >+               goto end;
>> >> >+       }
>> >> >+
>> >> >+       uint64_t tsc_start, tsc_end, tsc_duration;
>> >> >+
>> >> >+       tsc_start = tsc_end = tsc_duration = 0;
>> >> >+       if (benchmarking) {
>> >> >+               tsc_start = rte_rdtsc();
>> >> >+               num_iter = test_data->num_iter;
>> >> >+       } else
>> >> >+               num_iter = 1;
>> >> Looks like in same code we're doing benchmarking and functional
>> validation.
>> >> It can be reorganised to keep validation test separately like done in
>> >> crypto_perf.
>> >
>> >TJ: Ok, makes sense. However in the interests of getting this into the
>> >18.11 release I'd like to defer this refactoring and the remainder of your
>> comments below to the next release.
>> >
>> >
>> >Next comments - WIP
>> >
>> >
>> >Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser
  2018-11-05  8:40     ` Verma, Shally
@ 2018-11-06  8:30       ` Jozwiak, TomaszX
  2018-11-06  8:32         ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-06  8:30 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Monday, November 5, 2018 9:40 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [PATCH v2 1/3] app/compress-perf: add parser
> 
> 
> 
> >-----Original Message-----
> >From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >Sent: 02 November 2018 15:14
> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >Verma, Shally <Shally.Verma@cavium.com>; akhil.goyal@nxp.com
> >Subject: [PATCH v2 1/3] app/compress-perf: add parser
> >
> >External Email
> >
> >Added parser part into compression perf. test.
> >
> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >---
> > app/Makefile                                     |   4 +
> > app/meson.build                                  |   1 +
> > app/test-compress-perf/Makefile                  |  16 +
> > app/test-compress-perf/comp_perf_options.h       |  59 +++
> > app/test-compress-perf/comp_perf_options_parse.c | 596
> +++++++++++++++++++++++
> > app/test-compress-perf/main.c                    |  52 ++
> > app/test-compress-perf/meson.build               |   7 +
> > config/common_base                               |   5 +
> > 8 files changed, 740 insertions(+)
> > create mode 100644 app/test-compress-perf/Makefile  create mode
> 100644
> >app/test-compress-perf/comp_perf_options.h
> > create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
> > create mode 100644 app/test-compress-perf/main.c  create mode 100644
> >app/test-compress-perf/meson.build
> >
> >diff --git a/app/Makefile b/app/Makefile index 069fa98..d6641ef 100644
> >--- a/app/Makefile
> >+++ b/app/Makefile
> >@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
> > DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev  endif
> >
> >+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
> >+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf endif
> >+
> > ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
> > DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf  endif diff
> >--git a/app/meson.build b/app/meson.build index a9a026b..47a2a86 100644
> >--- a/app/meson.build
> >+++ b/app/meson.build
> >@@ -4,6 +4,7 @@
> > apps = ['pdump',
> >        'proc-info',
> >        'test-bbdev',
> >+       'test-compress-perf',
> >        'test-crypto-perf',
> >        'test-eventdev',
> >        'test-pmd']
> >diff --git a/app/test-compress-perf/Makefile
> >b/app/test-compress-perf/Makefile new file mode 100644 index
> >0000000..8aa7a22
> >--- /dev/null
> >+++ b/app/test-compress-perf/Makefile
> >@@ -0,0 +1,16 @@
> >+# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel
> >+Corporation
> >+
> >+include $(RTE_SDK)/mk/rte.vars.mk
> >+
> >+APP = dpdk-test-compress-perf
> >+
> >+CFLAGS += $(WERROR_FLAGS)
> >+CFLAGS += -DALLOW_EXPERIMENTAL_API
> >+CFLAGS += -O3
> >+
> >+# all source are stored in SRCS-y
> >+SRCS-y := main.c
> >+SRCS-y += comp_perf_options_parse.c
> >+
> >+include $(RTE_SDK)/mk/rte.app.mk
> >diff --git a/app/test-compress-perf/comp_perf_options.h
> >b/app/test-compress-perf/comp_perf_options.h
> >new file mode 100644
> >index 0000000..7516ea0
> >--- /dev/null
> >+++ b/app/test-compress-perf/comp_perf_options.h
> >@@ -0,0 +1,59 @@
> >+/* SPDX-License-Identifier: BSD-3-Clause
> >+ * Copyright(c) 2018 Intel Corporation  */
> >+
> >+#define MAX_DRIVER_NAME                64
> >+#define MAX_INPUT_FILE_NAME    64
> >+#define MAX_LIST               32
> >+
> >+enum comp_operation {
> >+       COMPRESS_ONLY,
> >+       DECOMPRESS_ONLY,
> >+       COMPRESS_DECOMPRESS
> >+};
> >+
> >+struct range_list {
> >+       uint8_t min;
> >+       uint8_t max;
> >+       uint8_t inc;
> >+       uint8_t count;
> >+       uint8_t list[MAX_LIST];
> >+};
> >+
> >+struct comp_test_data {
> >+       char driver_name[64];
> >+       char input_file[64];
> >+       struct rte_mbuf **comp_bufs;
> >+       struct rte_mbuf **decomp_bufs;
> >+       uint32_t total_bufs;
> >+       uint8_t *input_data;
> >+       size_t input_data_sz;
> >+       uint8_t *compressed_data;
> >+       uint8_t *decompressed_data;
> >+       struct rte_mempool *comp_buf_pool;
> >+       struct rte_mempool *decomp_buf_pool;
> >+       struct rte_mempool *op_pool;
> >+       int8_t cdev_id;
> >+       uint16_t seg_sz;
> >+       uint16_t burst_sz;
> >+       uint32_t pool_sz;
> >+       uint32_t num_iter;
> >+       uint16_t max_sgl_segs;
> >+       enum rte_comp_huffman huffman_enc;
> >+       enum comp_operation test_op;
> >+       int window_sz;
> >+       struct range_list level;
> >+       /* Store TSC duration for all levels (including level 0) */
> >+       uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
> >+       uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1]; };
> >+
> >+int
> >+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
> >+                       char **argv);
> >+
> >+void
> >+comp_perf_options_default(struct comp_test_data *test_data);
> >+
> >+int
> >+comp_perf_options_check(struct comp_test_data *test_data);
> >diff --git a/app/test-compress-perf/comp_perf_options_parse.c
> >b/app/test-compress-perf/comp_perf_options_parse.c
> >new file mode 100644
> >index 0000000..bef4d2f
> >--- /dev/null
> >+++ b/app/test-compress-perf/comp_perf_options_parse.c
> >@@ -0,0 +1,596 @@
> >+/* SPDX-License-Identifier: BSD-3-Clause
> >+ * Copyright(c) 2018 Intel Corporation  */
> >+
> >+#include <getopt.h>
> >+#include <stdint.h>
> >+#include <stdio.h>
> >+#include <string.h>
> >+#include <inttypes.h>
> >+#include <stdlib.h>
> >+#include <errno.h>
> >+
> >+#include <rte_string_fns.h>
> >+#include <rte_comp.h>
> >+
> >+#include "comp_perf_options.h"
> >+
> >+#define CPERF_DRIVER_NAME      ("driver-name")
> >+#define CPERF_TEST_FILE                ("input-file")
> >+#define CPERF_SEG_SIZE         ("seg-sz")
> >+#define CPERF_BURST_SIZE       ("burst-sz")
> >+#define CPERF_EXTENDED_SIZE    ("extended-input-sz")
> >+#define CPERF_POOL_SIZE                ("pool-sz")
> >+#define CPERF_MAX_SGL_SEGS     ("max-num-sgl-segs")
> >+#define CPERF_NUM_ITER         ("num-iter")
> >+#define CPERF_OPTYPE           ("operation")
> >+#define CPERF_HUFFMAN_ENC      ("huffman-enc")
> >+#define CPERF_LEVEL            ("compress-level")
> >+#define CPERF_WINDOW_SIZE      ("window-sz")
> >+
> >+struct name_id_map {
> >+       const char *name;
> >+       uint32_t id;
> >+};
> >+
> >+static void
> >+usage(char *progname)
> >+{
> >+       printf("%s [EAL options] --\n"
> >+               " --driver-name NAME: compress driver to use\n"
> >+               " --input-file NAME: file to compress and decompress\n"
> >+               " --extended-input-sz N: extend file data up to this size (default:
> no extension)\n"
> >+               " --seg-sz N: size of segment to store the data (default: 2048)\n"
> >+               " --burst-sz N: compress operation burst size\n"
> >+               " --pool-sz N: mempool size for compress operations/mbufs\n"
> >+               "               (default: 8192)\n"
> >+               " --max-num-sgl-segs N: maximum number of segments for each
> mbuf\n"
> We still taking it as input?

TJ: In this version yes.



> 
> >+               "               (default: 65535)\n"
> >+               " --num-iter N: number of times the file will be\n"
> >+               "               compressed/decompressed (default: 10000)\n"
> >+               " --operation [comp/decomp/comp_and_decomp]: perform test
> on\n"
> >+               "               compression, decompression or both operations\n"
> >+               " --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
> >+               "               (default: dynamic)\n"
> >+               " --compress-level N: compression level, which could be a single
> value, list or range\n"
> >+               "               (default: range between 1 and 9)\n"
> >+               " --window-sz N: base two log value of compression window
> size\n"
> >+               "               (e.g.: 15 => 32k, default: max supported by PMD)\n"
> >+               " -h: prints this help\n",
> >+               progname);
> >+}
> >+
> >+static int
> >+get_str_key_id_mapping(struct name_id_map *map, unsigned int
> map_len,
> >+               const char *str_key)
> >+{
> >+       unsigned int i;
> >+
> >+       for (i = 0; i < map_len; i++) {
> >+
> >+               if (strcmp(str_key, map[i].name) == 0)
> >+                       return map[i].id;
> >+       }
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+parse_uint32_t(uint32_t *value, const char *arg) {
> >+       char *end = NULL;
> >+       unsigned long n = strtoul(arg, &end, 10);
> >+
> >+       if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
> >+               return -1;
> >+
> >+       if (n > UINT32_MAX)
> >+               return -ERANGE;
> >+
> >+       *value = (uint32_t) n;
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_uint16_t(uint16_t *value, const char *arg) {
> >+       uint32_t val = 0;
> >+       int ret = parse_uint32_t(&val, arg);
> >+
> >+       if (ret < 0)
> >+               return ret;
> >+
> >+       if (val > UINT16_MAX)
> >+               return -ERANGE;
> >+
> >+       *value = (uint16_t) val;
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
> >+{
> >+       char *token;
> >+       uint8_t number;
> >+
> >+       char *copy_arg = strdup(arg);
> >+
> >+       if (copy_arg == NULL)
> >+               return -1;
> >+
> >+       errno = 0;
> >+       token = strtok(copy_arg, ":");
> >+
> >+       /* Parse minimum value */
> >+       if (token != NULL) {
> >+               number = strtoul(token, NULL, 10);
> >+
> >+               if (errno == EINVAL || errno == ERANGE)
> >+                       goto err_range;
> >+
> >+               *min = number;
> >+       } else
> >+               goto err_range;
> >+
> >+       token = strtok(NULL, ":");
> >+
> >+       /* Parse increment value */
> >+       if (token != NULL) {
> >+               number = strtoul(token, NULL, 10);
> >+
> >+               if (errno == EINVAL || errno == ERANGE ||
> >+                               number == 0)
> >+                       goto err_range;
> >+
> >+               *inc = number;
> >+       } else
> >+               goto err_range;
> >+
> >+       token = strtok(NULL, ":");
> >+
> >+       /* Parse maximum value */
> >+       if (token != NULL) {
> >+               number = strtoul(token, NULL, 10);
> >+
> >+               if (errno == EINVAL || errno == ERANGE ||
> >+                               number < *min)
> >+                       goto err_range;
> >+
> >+               *max = number;
> >+       } else
> >+               goto err_range;
> >+
> >+       if (strtok(NULL, ":") != NULL)
> >+               goto err_range;
> >+
> >+       free(copy_arg);
> >+       return 0;
> >+
> >+err_range:
> >+       free(copy_arg);
> >+       return -1;
> >+}
> >+
> >+static int
> >+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
> >+{
> >+       char *token;
> >+       uint32_t number;
> >+       uint8_t count = 0;
> >+       uint32_t temp_min;
> >+       uint32_t temp_max;
> >+
> >+       char *copy_arg = strdup(arg);
> >+
> >+       if (copy_arg == NULL)
> >+               return -1;
> >+
> >+       errno = 0;
> >+       token = strtok(copy_arg, ",");
> >+
> >+       /* Parse first value */
> >+       if (token != NULL) {
> >+               number = strtoul(token, NULL, 10);
> >+
> >+               if (errno == EINVAL || errno == ERANGE)
> >+                       goto err_list;
> >+
> >+               list[count++] = number;
> >+               temp_min = number;
> >+               temp_max = number;
> >+       } else
> >+               goto err_list;
> >+
> >+       token = strtok(NULL, ",");
> >+
> >+       while (token != NULL) {
> >+               if (count == MAX_LIST) {
> >+                       RTE_LOG(WARNING, USER1,
> >+                               "Using only the first %u sizes\n",
> >+                                       MAX_LIST);
> >+                       break;
> >+               }
> >+
> >+               number = strtoul(token, NULL, 10);
> >+
> >+               if (errno == EINVAL || errno == ERANGE)
> >+                       goto err_list;
> >+
> >+               list[count++] = number;
> >+
> >+               if (number < temp_min)
> >+                       temp_min = number;
> >+               if (number > temp_max)
> >+                       temp_max = number;
> >+
> >+               token = strtok(NULL, ",");
> >+       }
> >+
> >+       if (min)
> >+               *min = temp_min;
> >+       if (max)
> >+               *max = temp_max;
> >+
> >+       free(copy_arg);
> >+       return count;
> >+
> >+err_list:
> >+       free(copy_arg);
> >+       return -1;
> >+}
> >+
> >+static int
> >+parse_num_iter(struct comp_test_data *test_data, const char *arg) {
> >+       int ret = parse_uint32_t(&test_data->num_iter, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->num_iter == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                               "Total number of iterations must be higher than 0\n");
> >+               return -1;
> >+       }
> >+
> >+       return ret;
> >+}
> >+
> >+static int
> >+parse_pool_sz(struct comp_test_data *test_data, const char *arg) {
> >+       int ret = parse_uint32_t(&test_data->pool_sz, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse pool size");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->pool_sz == 0) {
> >+               RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
> >+               return -1;
> >+       }
> >+
> >+       return ret;
> >+}
> >+
> >+static int
> >+parse_burst_sz(struct comp_test_data *test_data, const char *arg) {
> >+       int ret = parse_uint16_t(&test_data->burst_sz, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->burst_sz == 0) {
> >+               RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_extended_input_sz(struct comp_test_data *test_data, const char
> >+*arg) {
> >+       uint32_t tmp;
> >+       int ret = parse_uint32_t(&tmp, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
> >+               return -1;
> >+       }
> >+       test_data->input_data_sz = tmp;
> >+
> >+       if (tmp == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Extended file size must be higher than 0\n");
> >+               return -1;
> >+       }
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_seg_sz(struct comp_test_data *test_data, const char *arg) {
> >+       int ret = parse_uint16_t(&test_data->seg_sz, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->seg_sz == 0) {
> >+               RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char
> >+*arg) {
> >+       int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Failed to parse max number of segments per mbuf chain\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->max_sgl_segs == 0) {
> >+               RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
> >+                       "must be higher than 0\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_window_sz(struct comp_test_data *test_data, const char *arg) {
> >+       int ret = parse_uint16_t((uint16_t *)&test_data->window_sz,
> >+arg);
> >+
> >+       if (ret) {
> >+               RTE_LOG(ERR, USER1, "Failed to parse window size\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_driver_name(struct comp_test_data *test_data, const char *arg) {
> >+       if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
> >+               return -1;
> >+
> >+       rte_strlcpy(test_data->driver_name, arg,
> >+                       sizeof(test_data->driver_name));
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_test_file(struct comp_test_data *test_data, const char *arg) {
> >+       if (strlen(arg) > (sizeof(test_data->input_file) - 1))
> >+               return -1;
> >+
> >+       rte_strlcpy(test_data->input_file, arg,
> >+ sizeof(test_data->input_file));
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_op_type(struct comp_test_data *test_data, const char *arg) {
> >+       struct name_id_map optype_namemap[] = {
> >+               {
> >+                       "comp",
> >+                       COMPRESS_ONLY
> >+               },
> >+               {
> >+                       "decomp",
> >+                       DECOMPRESS_ONLY
> >+               },
> >+               {
> >+                       "comp_and_decomp",
> >+                       COMPRESS_DECOMPRESS
> >+               }
> >+       };
> >+
> >+       int id = get_str_key_id_mapping(optype_namemap,
> >+                       RTE_DIM(optype_namemap), arg);
> >+       if (id < 0) {
> >+               RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->test_op = (enum comp_operation)id;
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_huffman_enc(struct comp_test_data *test_data, const char *arg) {
> >+       struct name_id_map huffman_namemap[] = {
> >+               {
> >+                       "default",
> >+                       RTE_COMP_HUFFMAN_DEFAULT
> >+               },
> >+               {
> >+                       "fixed",
> >+                       RTE_COMP_HUFFMAN_FIXED
> >+               },
> >+               {
> >+                       "dynamic",
> >+                       RTE_COMP_HUFFMAN_DYNAMIC
> >+               }
> >+       };
> >+
> >+       int id = get_str_key_id_mapping(huffman_namemap,
> >+                       RTE_DIM(huffman_namemap), arg);
> >+       if (id < 0) {
> >+               RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->huffman_enc = (enum rte_comp_huffman)id;
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+parse_level(struct comp_test_data *test_data, const char *arg) {
> >+       int ret;
> >+
> >+       /*
> >+        * Try parsing the argument as a range, if it fails,
> >+        * arse it as a list
> >+        */
> >+       if (parse_range(arg, &test_data->level.min, &test_data->level.max,
> >+                       &test_data->level.inc) < 0) {
> >+               ret = parse_list(arg, test_data->level.list,
> >+                                       &test_data->level.min,
> >+                                       &test_data->level.max);
> >+               if (ret < 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Failed to parse compression level/s\n");
> >+                       return -1;
> >+               }
> >+               test_data->level.count = ret;
> >+
> >+               if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
> >+                       RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
> >+                                       RTE_COMP_LEVEL_MAX);
> >+                       return -1;
> >+               }
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+typedef int (*option_parser_t)(struct comp_test_data *test_data,
> >+               const char *arg);
> >+
> >+struct long_opt_parser {
> >+       const char *lgopt_name;
> >+       option_parser_t parser_fn;
> >+
> >+};
> >+
> >+static struct option lgopts[] = {
> >+
> >+       { CPERF_DRIVER_NAME, required_argument, 0, 0 },
> >+       { CPERF_TEST_FILE, required_argument, 0, 0 },
> >+       { CPERF_SEG_SIZE, required_argument, 0, 0 },
> >+       { CPERF_BURST_SIZE, required_argument, 0, 0 },
> >+       { CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
> >+       { CPERF_POOL_SIZE, required_argument, 0, 0 },
> >+       { CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
> >+       { CPERF_NUM_ITER, required_argument, 0, 0 },
> >+       { CPERF_OPTYPE, required_argument, 0, 0 },
> >+       { CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
> >+       { CPERF_LEVEL, required_argument, 0, 0 },
> >+       { CPERF_WINDOW_SIZE, required_argument, 0, 0 },
> >+       { NULL, 0, 0, 0 }
> >+};
> >+static int
> >+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data
> >+*test_data) {
> >+       struct long_opt_parser parsermap[] = {
> >+               { CPERF_DRIVER_NAME,    parse_driver_name },
> >+               { CPERF_TEST_FILE,      parse_test_file },
> >+               { CPERF_SEG_SIZE,       parse_seg_sz },
> >+               { CPERF_BURST_SIZE,     parse_burst_sz },
> >+               { CPERF_EXTENDED_SIZE,  parse_extended_input_sz },
> >+               { CPERF_POOL_SIZE,      parse_pool_sz },
> >+               { CPERF_MAX_SGL_SEGS,   parse_max_num_sgl_segs },
> >+               { CPERF_NUM_ITER,       parse_num_iter },
> >+               { CPERF_OPTYPE,         parse_op_type },
> >+               { CPERF_HUFFMAN_ENC,    parse_huffman_enc },
> >+               { CPERF_LEVEL,          parse_level },
> >+               { CPERF_WINDOW_SIZE,    parse_window_sz },
> >+       };
> >+       unsigned int i;
> >+
> >+       for (i = 0; i < RTE_DIM(parsermap); i++) {
> >+               if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
> >+                               strlen(lgopts[opt_idx].name)) == 0)
> >+                       return parsermap[i].parser_fn(test_data, optarg);
> >+       }
> >+
> >+       return -EINVAL;
> >+}
> >+
> >+int
> >+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
> >+char **argv) {
> >+       int opt, retval, opt_idx;
> >+
> >+       while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
> >+               switch (opt) {
> >+               case 'h':
> >+                       usage(argv[0]);
> >+                       rte_exit(EXIT_SUCCESS, "Displayed help\n");
> >+                       break;
> >+               /* long options */
> >+               case 0:
> >+                       retval = comp_perf_opts_parse_long(opt_idx, test_data);
> >+                       if (retval != 0)
> >+                               return retval;
> >+
> >+                       break;
> >+
> >+               default:
> >+                       usage(argv[0]);
> >+                       return -EINVAL;
> >+               }
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+void
> >+comp_perf_options_default(struct comp_test_data *test_data) {
> >+       test_data->cdev_id = -1;
> >+       test_data->seg_sz = 2048;
> >+       test_data->burst_sz = 32;
> >+       test_data->pool_sz = 8192;
> >+       test_data->max_sgl_segs = UINT16_MAX;
> >+       test_data->num_iter = 10000;
> >+       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
> >+       test_data->test_op = COMPRESS_DECOMPRESS;
> >+       test_data->window_sz = -1;
> >+       test_data->level.min = 1;
> >+       test_data->level.max = 9;
> >+       test_data->level.inc = 1;
> >+}
> >+
> >+int
> >+comp_perf_options_check(struct comp_test_data *test_data) {
> >+       if (strcmp(test_data->driver_name, "") == 0) {
> >+               RTE_LOG(ERR, USER1, "Driver name has to be set\n");
> >+               return -1;
> >+       }
> >+
> >+       if (strcmp(test_data->input_file, "") == 0) {
> >+               RTE_LOG(ERR, USER1, "Input file name has to be set\n");
> >+               return -1;
> >+       }
> Think other params such as window sz , Huffman coding level too should be
> test and adjusted according to driver capability

TJ:  We can try to do this in next version if there's any info about these driver's  capability in struct rte_compressdev_capabilities.
In this version we test all possible coding level - if there's no '--compress-level' param. in the command line or we parse the list passed in '--compress-level' in other case.

> Thanks
> Shally


Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] app/compress-perf: add parser
  2018-11-06  8:30       ` Jozwiak, TomaszX
@ 2018-11-06  8:32         ` Verma, Shally
  0 siblings, 0 replies; 76+ messages in thread
From: Verma, Shally @ 2018-11-06  8:32 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal



>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 06 November 2018 14:01
>To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>Subject: RE: [PATCH v2 1/3] app/compress-perf: add parser
>
>External Email
>
...
>> >+       if (strcmp(test_data->input_file, "") == 0) {
>> >+               RTE_LOG(ERR, USER1, "Input file name has to be set\n");
>> >+               return -1;
>> >+       }
>> Think other params such as window sz , Huffman coding level too should be
>> test and adjusted according to driver capability
>
>TJ:  We can try to do this in next version if there's any info about these driver's  capability in struct rte_compressdev_capabilities.
>In this version we test all possible coding level - if there's no '--compress-level' param. in the command line or we parse the list passed
>in '--compress-level' in other case.
>
Ok.

>> Thanks
>> Shally
>
>
>Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement
  2018-11-05  8:56     ` Verma, Shally
@ 2018-11-06  8:49       ` Jozwiak, TomaszX
  2018-11-06 15:37         ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-06  8:49 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Monday, November 5, 2018 9:57 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >Sent: 02 November 2018 15:14
> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >Verma, Shally <Shally.Verma@cavium.com>; akhil.goyal@nxp.com
> >Subject: [PATCH v2 2/3] app/compress-perf: add performance
> measurement
> >
> >External Email
> >
> >Added performance measurement part into compression perf. test.
> >
> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >---
> > app/test-compress-perf/comp_perf_options_parse.c |   8 +-
> > app/test-compress-perf/main.c                    | 886
> ++++++++++++++++++++++-
> > 2 files changed, 883 insertions(+), 11 deletions(-)
> >
> >diff --git a/app/test-compress-perf/comp_perf_options_parse.c
> >b/app/test-compress-perf/comp_perf_options_parse.c
> >index bef4d2f..e5da3ad 100644
> >--- a/app/test-compress-perf/comp_perf_options_parse.c
> >+++ b/app/test-compress-perf/comp_perf_options_parse.c
> >@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data
> *test_data, const char *arg)
> >                {
> >                        "fixed",
> >                        RTE_COMP_HUFFMAN_FIXED
> >-               },
> >-               {
> >-                       "dynamic",
> >-                       RTE_COMP_HUFFMAN_DYNAMIC
> >                }
> >        };
> >
> >@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data
> *test_data)
> >        test_data->seg_sz = 2048;
> >        test_data->burst_sz = 32;
> >        test_data->pool_sz = 8192;
> >-       test_data->max_sgl_segs = UINT16_MAX;
> >+       test_data->max_sgl_segs = 16;
> >        test_data->num_iter = 10000;
> >-       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
> >+       test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
> >        test_data->test_op = COMPRESS_DECOMPRESS;
> >        test_data->window_sz = -1;
> >        test_data->level.min = 1;
> >diff --git a/app/test-compress-perf/main.c
> >b/app/test-compress-perf/main.c index f52b98d..e3f4bf6 100644
> >--- a/app/test-compress-perf/main.c
> >+++ b/app/test-compress-perf/main.c
> >@@ -5,14 +5,728 @@
> > #include <rte_malloc.h>
> > #include <rte_eal.h>
> > #include <rte_log.h>
> >+#include <rte_cycles.h>
> > #include <rte_compressdev.h>
> >
> > #include "comp_perf_options.h"
> >
> >+#define NUM_MAX_XFORMS 16
> >+#define NUM_MAX_INFLIGHT_OPS 512
> >+#define EXPANSE_RATIO 1.05
> >+#define MIN_ISAL_SIZE 8
> Can we avoid ISAL specific naming ?

TJ: yes true :) will be fixed in V3




> >+
> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >+
> >+/* Cleanup state machine */
> >+static enum cleanup_st {
> >+       ST_CLEAR = 0,
> >+       ST_TEST_DATA,
> >+       ST_COMPDEV,
> >+       ST_INPUT_DATA,
> >+       ST_MEMORY_ALLOC,
> >+       ST_PREPARE_BUF,
> >+       ST_DURING_TEST
> >+} cleanup = ST_CLEAR;
> >+
> >+static int
> >+param_range_check(uint16_t size, const struct rte_param_log2_range
> >+*range) {
> >+       unsigned int next_size;
> >+
> >+       /* Check lower/upper bounds */
> >+       if (size < range->min)
> >+               return -1;
> >+
> >+       if (size > range->max)
> >+               return -1;
> >+
> >+       /* If range is actually only one value, size is correct */
> >+       if (range->increment == 0)
> >+               return 0;
> >+
> >+       /* Check if value is one of the supported sizes */
> >+       for (next_size = range->min; next_size <= range->max;
> >+                       next_size += range->increment)
> >+               if (size == next_size)
> >+                       return 0;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >+       const struct rte_compressdev_capabilities *cap;
> >+
> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >+                                            RTE_COMP_ALGO_DEFLATE);
> >+
> >+       if (cap == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not support DEFLATE\n");
> >+               return -1;
> >+       }
> >+
> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >+
> >+       /* Huffman enconding */
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Fixed Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Dynamic Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       /* Window size */
> >+       if (test_data->window_sz != -1) {
> >+               if (param_range_check(test_data->window_sz, &cap-
> >window_size)
> >+                               < 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Compress device does not support "
> >+                               "this window size\n");
> >+                       return -1;
> >+               }
> >+       } else
> >+               /* Set window size to PMD maximum if none was specified */
> >+               test_data->window_sz = cap->window_size.max;
> >+
> >+       /* Check if chained mbufs is supported */
> >+       if (test_data->max_sgl_segs > 1  &&
> >+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
> >+               RTE_LOG(INFO, USER1, "Compress device does not support "
> >+                               "chained mbufs. Max SGL segments set to 1\n");
> >+               test_data->max_sgl_segs = 1;
> >+       }
> >+
> >+       /* Level 0 support */
> >+       if (test_data->level.min == 0 &&
> >+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
> 0) {
> >+               RTE_LOG(ERR, USER1, "Compress device does not support "
> >+                               "level 0 (no compression)\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
> >+       /* Number of segments for input and output
> >+        * (compression and decompression)
> >+        */
> >+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
> >+                       test_data->seg_sz);
> >+       test_data->comp_buf_pool =
> rte_pktmbuf_pool_create("comp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->comp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> >+
> >+       cleanup = ST_MEMORY_ALLOC;
> >+       test_data->decomp_buf_pool =
> rte_pktmbuf_pool_create("decomp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->decomp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> Unless am missing to see it, you need to free pre-allocated memories here
> before return call for all failed cases.

TJ: There's only one 'freeing stack' at the end of main application function to avoid double freeing resources (which was previously n V1).
We have state machine for that stuff (static enum cleanup_st) to know what should be free and what has been allocated already.
In case you mean the state machine is set just after first alloc in line 136:

cleanup = ST_MEMORY_ALLOC;

so we know what should be free at the end of application running in line 891:

end:
	switch (cleanup) {

	case ST_DURING_TEST:
	case ST_PREPARE_BUF:
		free_bufs(test_data);
		/* fallthrough */
	case ST_MEMORY_ALLOC:
		rte_free(test_data->decomp_bufs);
		rte_free(test_data->comp_bufs);
		rte_free(test_data->decompressed_data);
		rte_free(test_data->compressed_data);
		rte_mempool_free(test_data->op_pool);
		rte_mempool_free(test_data->decomp_buf_pool);
		rte_mempool_free(test_data->comp_buf_pool);
		/* fallthrough */
	case ST_INPUT_DATA:
		rte_free(test_data->input_data);
		/* fallthrough */
	case ST_COMPDEV:
		if (test_data->cdev_id != -1)
			rte_compressdev_stop(test_data->cdev_id);
		/* fallthrough */
	case ST_TEST_DATA:
		rte_free(test_data);
		/* fallthrough */
	case ST_CLEAR:
	default:
		i = rte_eal_cleanup();
		if (i) {
			RTE_LOG(ERR, USER1,
				"Error from rte_eal_cleanup(), %d\n", i);
			ret = i;
		}
		break;
	}
	return ret;



> 
> >+
> >+       test_data->total_bufs = DIV_CEIL(total_segs,
> >+ test_data->max_sgl_segs);
> >+
> >+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
> >+                                 test_data->total_bufs,
> >+                                 0, 0, rte_socket_id());
> >+       if (test_data->op_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Comp op mempool could not be
> created\n");
> >+               return -1;
> >+       }
> >+
> >+       /*
> >+        * Compressed data might be a bit larger than input data,
> >+        * if data cannot be compressed
> >+        */
> >+       test_data->compressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz * EXPANSE_RATIO
> >+                                                       +
> >+ MIN_ISAL_SIZE, 0,
> MIN_ISAL_SIZE looks specific to ISAL driver. if so, then is this perf app specific
> to that PMD? or Can we make it somewhat generic?

TJ: True will be fixed


> 
> >+                               rte_socket_id());
> >+       if (test_data->compressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+                               rte_socket_id());
> >+       if (test_data->decompressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->comp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->decomp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the decompression
> mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
> >+       FILE *f = fopen(test_data->input_file, "r");
> >+       int ret = -1;
> >+
> >+       if (f == NULL) {
> >+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
> >+               return -1;
> >+       }
> >+
> >+       if (fseek(f, 0, SEEK_END) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto end;
> >+       }
> >+       size_t actual_file_sz = ftell(f);
> >+       /* If extended input data size has not been set,
> >+        * input data size = file size
> >+        */
> >+
> >+       if (test_data->input_data_sz == 0)
> >+               test_data->input_data_sz = actual_file_sz;
> >+
> >+       if (fseek(f, 0, SEEK_SET) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
> >+               goto end;
> >+       }
> >+
> >+       test_data->input_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+ rte_socket_id());
> >+
> >+       if (test_data->input_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               goto end;
> >+       }
> >+
> >+       size_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *data = test_data->input_data;
> >+
> >+       while (remaining_data > 0) {
> >+               size_t data_to_read = RTE_MIN(remaining_data,
> >+ actual_file_sz);
> >+
> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
> >+                       goto end;
> >+               }
> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Size of input could not be calculated\n");
> >+                       goto end;
> >+               }
> >+               remaining_data -= data_to_read;
> >+               data += data_to_read;
> >+       }
> >+
> >+       if (test_data->input_data_sz > actual_file_sz)
> >+               RTE_LOG(INFO, USER1,
> >+                 "%zu bytes read from file %s, extending the file %.2f times\n",
> >+                       test_data->input_data_sz, test_data->input_file,
> >+                       (double)test_data->input_data_sz/actual_file_sz);
> >+       else
> >+               RTE_LOG(INFO, USER1,
> >+                       "%zu bytes read from file %s\n",
> >+                       test_data->input_data_sz,
> >+ test_data->input_file);
> >+
> >+       ret = 0;
> >+
> >+end:
> >+       fclose(f);
> >+       return ret;
> >+}
> >+
> >+static int
> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
> >+       uint8_t enabled_cdev_count;
> >+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
> >+
> >+       enabled_cdev_count = rte_compressdev_devices_get(test_data-
> >driver_name,
> >+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
> >+       if (enabled_cdev_count == 0) {
> >+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
> >+                               test_data->driver_name);
> >+               return -EINVAL;
> >+       }
> >+
> >+       if (enabled_cdev_count > 1)
> >+               RTE_LOG(INFO, USER1,
> >+                       "Only the first compress device will be
> >+ used\n");
> >+
> >+       test_data->cdev_id = enabled_cdevs[0];
> >+
> >+       if (comp_perf_check_capabilities(test_data) < 0)
> >+               return -1;
> >+
> >+       /* Configure compressdev (one device, one queue pair) */
> >+       struct rte_compressdev_config config = {
> >+               .socket_id = rte_socket_id(),
> >+               .nb_queue_pairs = 1,
> >+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
> >+               .max_nb_streams = 0
> >+       };
> >+
> >+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
> >+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
> >+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device could not be started\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+prepare_bufs(struct comp_test_data *test_data) {
> >+       uint32_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *input_data_ptr = test_data->input_data;
> >+       size_t data_sz;
> >+       uint8_t *data_addr;
> >+       uint32_t i, j;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               /* Allocate data in input mbuf and copy data from input file */
> >+               test_data->decomp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+               if (test_data->decomp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+
> >+               cleanup = ST_PREPARE_BUF;
> >+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->decomp_bufs[i], data_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> So is here ..free of allocated buffer before return from failed cases
> 
> Thanks
> Shally

Thx, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 3/3] doc/guides/tools: add doc files
  2018-11-05  8:57     ` Verma, Shally
@ 2018-11-06  8:51       ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-06  8:51 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Monday, November 5, 2018 9:58 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [PATCH v2 3/3] doc/guides/tools: add doc files
> 
> 
> 
> >-----Original Message-----
> >From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >Sent: 02 November 2018 15:14
> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >Verma, Shally <Shally.Verma@cavium.com>; akhil.goyal@nxp.com
> >Subject: [PATCH v2 3/3] doc/guides/tools: add doc files
> >
> >External Email
> >
> >Added:
> > -  initial version of compression performance test
> >    description file.
> > -  release note in release_18_11.rst
> >
> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >---
> > MAINTAINERS                            |  5 +++
> > doc/guides/rel_notes/release_18_11.rst |  6 +++
> > doc/guides/tools/comp_perf.rst         | 75
> ++++++++++++++++++++++++++++++++++
> > 3 files changed, 86 insertions(+)
> > create mode 100644 doc/guides/tools/comp_perf.rst
> >
> >diff --git a/MAINTAINERS b/MAINTAINERS
> >index e60379d..cfda6dd 100644
> >--- a/MAINTAINERS
> >+++ b/MAINTAINERS
> >@@ -1242,6 +1242,11 @@ M: Bernard Iremonger
> ><bernard.iremonger@intel.com>
> > F: app/test-pmd/
> > F: doc/guides/testpmd_app_ug/
> >
> >+Compression performance test application
> >+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >+F: app/test-compress-perf/
> >+F: doc/guides/tools/comp_perf.rst
> >+
> > Crypto performance test application
> > M: Declan Doherty <declan.doherty@intel.com>
> > F: app/test-crypto-perf/
> >diff --git a/doc/guides/rel_notes/release_18_11.rst
> >b/doc/guides/rel_notes/release_18_11.rst
> >index 376128f..8bc7d05 100644
> >--- a/doc/guides/rel_notes/release_18_11.rst
> >+++ b/doc/guides/rel_notes/release_18_11.rst
> >@@ -285,6 +285,12 @@ New Features
> >   this application doesn't need to launch dedicated worker threads for vhost
> >   enqueue/dequeue operations.
> >
> >+* **Added a compression performance test tool.**
> >+
> >+   Added a new performance test tool to test the compressdev PMD. The
> tool tests
> >+   compression ratio and compression throughput. Dynamic compression
> test is not
> >+   supported yet.
> >+
> >
> > API Changes
> > -----------
> >diff --git a/doc/guides/tools/comp_perf.rst
> >b/doc/guides/tools/comp_perf.rst new file mode 100644 index
> >0000000..2f43412
> >--- /dev/null
> >+++ b/doc/guides/tools/comp_perf.rst
> >@@ -0,0 +1,75 @@
> >+..  SPDX-License-Identifier: BSD-3-Clause
> >+    Copyright(c) 2018 Intel Corporation.
> >+
> >+dpdk-test-crypto-perf Application
> >+=================================
> >+
> >+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit
> >+(DPDK) utility that allows measuring performance parameters of PMDs
> >+available in the compress tree. The application reads the data from a
> >+file (--input-file), dumps all the file into a buffer and fills out
> >+the data of input mbufs, which are passed to compress device with
> compression operations.
> >+Then, the output buffers are fed into the decompression stage, and the
> >+resulting data is compared against the original data (verification
> >+phase). After that, a number of iterations are performed, compressing
> >+first and decompressing later, to check the throughput rate (showing
> >+cycles/iteration, cycles/Byte and Gbps, for compression and
> decompression).
> >+
> >+
> >+Limitations
> >+~~~~~~~~~~~
> >+
> >+* Only supports the fixed compression.
> Perf app seem to support stateless only for now, so should mention as
> "supported for fixed and stateless operation only."
> Thanks
> Shally

TJ: True Thx for this. Will be fixed

Thx, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-06  8:15           ` Verma, Shally
@ 2018-11-06  9:05             ` Jozwiak, TomaszX
  2018-11-06 15:39               ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-06  9:05 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Tuesday, November 6, 2018 9:16 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >Sent: 06 November 2018 13:34
> >To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe,
> Fiona
> ><fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >Hi Shally,
> >
> >Please see my comment inline.
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Monday, November 5, 2018 9:34 AM
> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara
> >> Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> performance measurement
> >>
> >>
> >>
> >> >-----Original Message-----
> >> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >> >Sent: 02 November 2018 15:29
> >> >To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>;
> >> >akhil.goyal@nxp.com; Verma, Shally <Shally.Verma@cavium.com>; De
> >> >Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >performance measurement
> >> >
> >> >External Email
> >> >
> >> >Hi Shally,
> >> >
> >> >Sorry for delay - I was on sick leave.
> >> >We had some issues with dynamic compression test so I block this
> >> >test in V2. May be there's too late to add this into this release
> >> >but we've decided
> >> to send this V2 to DPDK.
> >> >
> >> >My comment inline (not all have answer so far, still working on
> >> >that)
> >> >
> >> >> -----Original Message-----
> >> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> >> Sent: Friday, October 12, 2018 12:16 PM
> >> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De Lara
> >> >> Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> >> Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> performance measurement
> >> >>
> >> >> HI TomaszX
> >> >>
> >> >> Sorry for delay in response. Comments inline.
> >> >>
> >> >> >-----Original Message-----
> >> >> >From: dev <dev-bounces@dpdk.org> On Behalf Of Tomasz Jozwiak
> >> >> >Sent: 01 October 2018 18:57
> >> >> >To: dev@dpdk.org; fiona.trahe@intel.com;
> >> >> >tomaszx.jozwiak@intel.com; akhil.goyal@nxp.com;
> >> >> >pablo.de.lara.guarch@intel.com
> >> >> >Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> >performance measurement
> >> >> >
> >> >> >External Email
> >> >> >
> >> >> >Added performance measurement part into compression perf. test.
> >> >> >
> >> >> >Signed-off-by: De Lara Guarch, Pablo
> >> >> ><pablo.de.lara.guarch@intel.com>
> >> >> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >> >> >---
> >> >> > app/test-compress-perf/main.c | 844
> >> >> >++++++++++++++++++++++++++++++++++++++++++
> >> >> > 1 file changed, 844 insertions(+)
> >> >> >
> >> >> >diff --git a/app/test-compress-perf/main.c
> >> >> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
> >> >> >--- a/app/test-compress-perf/main.c
> >> >> >+++ b/app/test-compress-perf/main.c
> >> >> >@@ -5,13 +5,721 @@
> >> >> > #include <rte_malloc.h>
> >> >> > #include <rte_eal.h>
> >> >> > #include <rte_log.h>
> >> >> >+#include <rte_cycles.h>
> >> >> > #include <rte_compressdev.h>
> >> >> >
> >> >> > #include "comp_perf_options.h"
> >> >> >
> >> >> >+#define NUM_MAX_XFORMS 16
> >> >> >+#define NUM_MAX_INFLIGHT_OPS 512 #define EXPANSE_RATIO
> 1.05
> >> >> >+#define MIN_ISAL_SIZE 8
> >> >> >+
> >> >> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >> >> >+
> >> >> >+static int
> >> >> >+param_range_check(uint16_t size, const struct
> >> >> >+rte_param_log2_range
> >> >> >+*range) {
> >> >> >+       unsigned int next_size;
> >> >> >+
> >> >> >+       /* Check lower/upper bounds */
> >> >> >+       if (size < range->min)
> >> >> >+               return -1;
> >> >> >+
> >> >> >+       if (size > range->max)
> >> >> >+               return -1;
> >> >> >+
> >> >> >+       /* If range is actually only one value, size is correct */
> >> >> >+       if (range->increment == 0)
> >> >> >+               return 0;
> >> >> >+
> >> >> >+       /* Check if value is one of the supported sizes */
> >> >> >+       for (next_size = range->min; next_size <= range->max;
> >> >> >+                       next_size += range->increment)
> >> >> >+               if (size == next_size)
> >> >> >+                       return 0;
> >> >> >+
> >> >> >+       return -1;
> >> >> >+}
> >> >> >+
> >> >> >+static int
> >> >> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >> >> >+       const struct rte_compressdev_capabilities *cap;
> >> >> >+
> >> >> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >> >> >+
> >> >> >+ RTE_COMP_ALGO_DEFLATE);
> >> >> >+
> >> >> >+       if (cap == NULL) {
> >> >> >+               RTE_LOG(ERR, USER1,
> >> >> >+                       "Compress device does not support DEFLATE\n");
> >> >> >+               return -1;
> >> >> >+       }
> >> >> >+
> >> >> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >> >> >+
> >> >> >+       /* Huffman enconding */
> >> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED
> &&
> >> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >> >> >+               RTE_LOG(ERR, USER1,
> >> >> >+                       "Compress device does not supported Fixed
> Huffman\n");
> >> >> >+               return -1;
> >> >> >+       }
> >> >> >+
> >> >> >+       if (test_data->huffman_enc ==
> RTE_COMP_HUFFMAN_DYNAMIC
> >> &&
> >> >> >+                       (comp_flags &
> >> >> >+ RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
> >> {
> >> >> >+               RTE_LOG(ERR, USER1,
> >> >> >+                       "Compress device does not supported
> >> >> >+ Dynamic
> >> Huffman\n");
> >> >> >+               return -1;
> >> >> >+       }
> >> >> >+
> >> >> >+       /* Window size */
> >> >> >+       if (test_data->window_sz != -1) {
> >> >> >+               if (param_range_check(test_data->window_sz,
> >> >> >+ &cap->window_size)
> >> >> What if cap->window_size is 0 i.e. implementation default?
> >> >
> >> >TJ: You probably mean cap->window_size.increment = 0 (because
> >> >cap->window_size is a structure). In that case we check if
> >> >test_data->window_sz >=min and test_data->window_sz <= max only,
> >> because increment = 0 means (base on compression API) we have only
> >> one value of windows_size (no range is supported).
> >> But PMD can set min and max too 0 for such case.
> >
> >TJ: I can't see any issue in that case too. Maybe I don't understand what you
> mean but the logic is as follow:
> >1)  if you pass '--window-sz  ...' param. into command line your
> >intention is to force that value of window size during test. We check is this
> value is allow (by param_range_check() function).
> >2) if you plan to use default value - just don't pass '--window-sz'
> >param. in command line at all. In that case we get windows size from
> >window_size.max field, so if window_size.min= window_size.max=0
> test_data->window_sz will be zero, as well.
> >If you mean that behavior is not good - I will be grateful for other
> suggestions.
> 
> This is fine. but I am thinking of 3rd case here:
> c) user pass window sz but PMD window_sz.min = max = 0, then user
> requested windowsz is not applicable right?!

In that case - true. There'll be fail :
"Compress device does not support this window size\n");
So what is your proposal for  that case?





> 
> >
> >>
> >> >
> >> >
> >> >
> >> ....
> >>
> >> >> >+
> >> >> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >> >> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
> >> >> >+                       goto err;
> >> >> >+               }
> >> >> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >> >> >+                       RTE_LOG(ERR, USER1,
> >> >> >+                               "Size of input could not be calculated\n");
> >> >> >+                       goto err;
> >> >> >+               }
> >> >> >+               remaining_data -= data_to_read;
> >> >> >+               data += data_to_read;
> >> >> It looks like it will run 2nd time only if input file size < input
> >> >> data size in which case it will just keep filling input buffer
> >> >> with repeated
> >> data.
> >> >> Is that the intention here?
> >> >
> >> >TJ: Yes exactly. If test_data->input_data_sz is bigger than
> >> >actual_file_sz then we fill the buffer with repeated data from file
> >> >to fill
> >> whole buffer.
> >> I mentioned in one of the earlier reply, wont that then influence the
> >> compression behaviour and o/p? my suggestion was to work on actual
> >> user provided input to take perf to get actual perf for given content.
> >
> >TJ: You right, but this solution is flexible. You can pass '
> >--extended-input-sz" or not, so you can use original input data or extend it
> if you want.
> Ok. but still not sure if it's really needed. Might be practically most of the time
> it wont be exercised. No hard opinion on this though.
> 
> Thanks
> Shally
> >
> >>
> >> >
> >> >>
> >> ...
> >>
> >> >> >+                       if (data_addr == NULL) {
> >> >> >+                               RTE_LOG(ERR, USER1, "Could not
> >> >> >+ append data\n");
> >> >> Since a new buffer per segment is allocated, so is it possible for
> >> >> append to fail? think, this check is redundant here.
> >> >
> >> >TJ: Yes, you're right, it should never fail. But I think it's good
> >> >coding practice
> >> to add the check just in case.
> >> >
> >> Unless it is called in data path which might cost perf a bit.
> >
> >TJ:  prepare_bufs() is out of perf measurement, so shouldn't impact to
> >measurements. The performance measurement is inside
> >main_loop() only.
> >
> >
> >Br, Tomek
> >
> >>
> >> Thanks
> >> Shally
> >>
> >> >> >+                               return -1;
> >> >> >+                       }
> >> >> >+
> >> >> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
> >> >> >+                       input_data_ptr += data_sz;
> >> >> >+                       remaining_data -= data_sz;
> >> >> >+
> >> >> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
> >> >> >+                                       next_seg) < 0) {
> >> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >> >> >+                               return -1;
> >> >> >+                       }
> >> >> >+                       segs_per_mbuf++;
> >> >> >+               }
> >> >> >+
> >> >> >+               /* Allocate data in output mbuf */
> >> >> >+               test_data->comp_bufs[i] =
> >> >> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >> >> >+               if (test_data->comp_bufs[i] == NULL) {
> >> >> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >> >> >+                       return -1;
> >> >> >+               }
> >> >> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >> >> >+                                       test_data->comp_bufs[i],
> >> >> >+                                       test_data->seg_sz);
> >> >> >+               if (data_addr == NULL) {
> >> >> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >> >> >+                       return -1;
> >> >> >+               }
> >> >> >+
> >> >> >+               /* Chain mbufs if needed for output mbufs */
> >> >> >+               for (j = 1; j < segs_per_mbuf; j++) {
> >> >> >+                       struct rte_mbuf *next_seg =
> >> >> >+
> >> >> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >> >> >+
> >> >> >+                       if (next_seg == NULL) {
> >> >> >+                               RTE_LOG(ERR, USER1,
> >> >> >+                                       "Could not allocate mbuf\n");
> >> >> >+                               return -1;
> >> >> >+                       }
> >> >> >+
> >> >> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >> >> >+                               test_data->seg_sz);
> >> >> >+
> >> >> >+                       if (data_addr == NULL) {
> >> >> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
> >> >> >+                               return -1;
> >> >> >+                       }
> >> >> >+
> >> >> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
> >> >> >+                                       next_seg) < 0) {
> >> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
> >> >> >+                               return -1;
> >> >> >+                       }
> >> >> >+               }
> >> >> >+       }
> >> >> >+
> >> >> >+       return 0;
> >> >> >+}
> >> >> >+
> >> >> >+static void
> >> >> >+free_bufs(struct comp_test_data *test_data) {
> >> >> >+       uint32_t i;
> >> >> >+
> >> >> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >> >> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
> >> >> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
> >> >> >+       }
> >> >> >+       rte_free(test_data->comp_bufs);
> >> >> >+       rte_free(test_data->decomp_bufs); }
> >> >> >+
> >> >> >+static int
> >> >> >+main_loop(struct comp_test_data *test_data, uint8_t level,
> >> >> >+                       enum rte_comp_xform_type type,
> >> >> >+                       uint8_t *output_data_ptr,
> >> >> >+                       size_t *output_data_sz,
> >> >> >+                       unsigned int benchmarking) {
> >> >> >+       uint8_t dev_id = test_data->cdev_id;
> >> >> >+       uint32_t i, iter, num_iter;
> >> >> >+       struct rte_comp_op **ops, **deq_ops;
> >> >> >+       void *priv_xform = NULL;
> >> >> >+       struct rte_comp_xform xform;
> >> >> >+       size_t output_size = 0;
> >> >> >+       struct rte_mbuf **input_bufs, **output_bufs;
> >> >> >+       int res = 0;
> >> >> >+       int allocated = 0;
> >> >> >+
> >> >> >+       if (test_data == NULL || !test_data->burst_sz) {
> >> >> >+               RTE_LOG(ERR, USER1,
> >> >> >+                       "Unknow burst size\n");
> >> >> >+               return -1;
> >> >> >+       }
> >> >> >+
> >> >> >+       ops = rte_zmalloc_socket(NULL,
> >> >> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
> >> >> >+               0, rte_socket_id());
> >> >> >+
> >> >> >+       if (ops == NULL) {
> >> >> >+               RTE_LOG(ERR, USER1,
> >> >> >+                       "Can't allocate memory for ops strucures\n");
> >> >> >+               return -1;
> >> >> >+       }
> >> >> >+
> >> >> >+       deq_ops = &ops[test_data->total_bufs];
> >> >> >+
> >> >> >+       if (type == RTE_COMP_COMPRESS) {
> >> >> >+               xform = (struct rte_comp_xform) {
> >> >> >+                       .type = RTE_COMP_COMPRESS,
> >> >> >+                       .compress = {
> >> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >> >> >+                               .deflate.huffman = test_data->huffman_enc,
> >> >> >+                               .level = level,
> >> >> >+                               .window_size = test_data->window_sz,
> >> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >> >> >+                       }
> >> >> >+               };
> >> >> >+               input_bufs = test_data->decomp_bufs;
> >> >> >+               output_bufs = test_data->comp_bufs;
> >> >> >+       } else {
> >> >> >+               xform = (struct rte_comp_xform) {
> >> >> >+                       .type = RTE_COMP_DECOMPRESS,
> >> >> >+                       .decompress = {
> >> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >> >> >+                               .window_size = test_data->window_sz,
> >> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >> >> >+                       }
> >> >> >+               };
> >> >> >+               input_bufs = test_data->comp_bufs;
> >> >> >+               output_bufs = test_data->decomp_bufs;
> >> >> >+       }
> >> >> >+
> >> >> >+       /* Create private xform */
> >> >> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
> >> >> >+                       &priv_xform) < 0) {
> >> >> >+               RTE_LOG(ERR, USER1, "Private xform could not be
> created\n");
> >> >> >+               res = -1;
> >> >> >+               goto end;
> >> >> >+       }
> >> >> >+
> >> >> >+       uint64_t tsc_start, tsc_end, tsc_duration;
> >> >> >+
> >> >> >+       tsc_start = tsc_end = tsc_duration = 0;
> >> >> >+       if (benchmarking) {
> >> >> >+               tsc_start = rte_rdtsc();
> >> >> >+               num_iter = test_data->num_iter;
> >> >> >+       } else
> >> >> >+               num_iter = 1;
> >> >> Looks like in same code we're doing benchmarking and functional
> >> validation.
> >> >> It can be reorganised to keep validation test separately like done
> >> >> in crypto_perf.
> >> >
> >> >TJ: Ok, makes sense. However in the interests of getting this into
> >> >the
> >> >18.11 release I'd like to defer this refactoring and the remainder
> >> >of your
> >> comments below to the next release.
> >> >
> >> >
> >> >Next comments - WIP
> >> >
> >> >
> >> >Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement
  2018-11-06  8:49       ` Jozwiak, TomaszX
@ 2018-11-06 15:37         ` Verma, Shally
  2018-11-07 10:14           ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-06 15:37 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal



>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 06 November 2018 14:19
>To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Monday, November 5, 2018 9:57 AM
>> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>> Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
>> measurement
>>
>>
>>
>> >-----Original Message-----
>> >From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>> >Sent: 02 November 2018 15:14
>> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
>> >Verma, Shally <Shally.Verma@cavium.com>; akhil.goyal@nxp.com
>> >Subject: [PATCH v2 2/3] app/compress-perf: add performance
>> measurement
>> >
>> >External Email
>> >
>> >Added performance measurement part into compression perf. test.
>> >
>> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
>> >---
>> > app/test-compress-perf/comp_perf_options_parse.c |   8 +-
>> > app/test-compress-perf/main.c                    | 886
>> ++++++++++++++++++++++-
>> > 2 files changed, 883 insertions(+), 11 deletions(-)
>> >
>> >diff --git a/app/test-compress-perf/comp_perf_options_parse.c
>> >b/app/test-compress-perf/comp_perf_options_parse.c
>> >index bef4d2f..e5da3ad 100644
>> >--- a/app/test-compress-perf/comp_perf_options_parse.c
>> >+++ b/app/test-compress-perf/comp_perf_options_parse.c
>> >@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data
>> *test_data, const char *arg)
>> >                {
>> >                        "fixed",
>> >                        RTE_COMP_HUFFMAN_FIXED
>> >-               },
>> >-               {
>> >-                       "dynamic",
>> >-                       RTE_COMP_HUFFMAN_DYNAMIC
>> >                }
>> >        };
>> >
>> >@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data
>> *test_data)
>> >        test_data->seg_sz = 2048;
>> >        test_data->burst_sz = 32;
>> >        test_data->pool_sz = 8192;
>> >-       test_data->max_sgl_segs = UINT16_MAX;
>> >+       test_data->max_sgl_segs = 16;
>> >        test_data->num_iter = 10000;
>> >-       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
>> >+       test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
>> >        test_data->test_op = COMPRESS_DECOMPRESS;
>> >        test_data->window_sz = -1;
>> >        test_data->level.min = 1;
>> >diff --git a/app/test-compress-perf/main.c
>> >b/app/test-compress-perf/main.c index f52b98d..e3f4bf6 100644
>> >--- a/app/test-compress-perf/main.c
>> >+++ b/app/test-compress-perf/main.c
>> >@@ -5,14 +5,728 @@
>> > #include <rte_malloc.h>
>> > #include <rte_eal.h>
>> > #include <rte_log.h>
>> >+#include <rte_cycles.h>
>> > #include <rte_compressdev.h>
>> >
>> > #include "comp_perf_options.h"
>> >
>> >+#define NUM_MAX_XFORMS 16
>> >+#define NUM_MAX_INFLIGHT_OPS 512
>> >+#define EXPANSE_RATIO 1.05
>> >+#define MIN_ISAL_SIZE 8
>> Can we avoid ISAL specific naming ?
>
>TJ: yes true :) will be fixed in V3
>
>
>
>
>> >+
>> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
>> >+
>> >+/* Cleanup state machine */
>> >+static enum cleanup_st {
>> >+       ST_CLEAR = 0,
>> >+       ST_TEST_DATA,
>> >+       ST_COMPDEV,
>> >+       ST_INPUT_DATA,
>> >+       ST_MEMORY_ALLOC,
>> >+       ST_PREPARE_BUF,
>> >+       ST_DURING_TEST
>> >+} cleanup = ST_CLEAR;
>> >+
>> >+static int
>> >+param_range_check(uint16_t size, const struct rte_param_log2_range
>> >+*range) {
>> >+       unsigned int next_size;
>> >+
>> >+       /* Check lower/upper bounds */
>> >+       if (size < range->min)
>> >+               return -1;
>> >+
>> >+       if (size > range->max)
>> >+               return -1;
>> >+
>> >+       /* If range is actually only one value, size is correct */
>> >+       if (range->increment == 0)
>> >+               return 0;
>> >+
>> >+       /* Check if value is one of the supported sizes */
>> >+       for (next_size = range->min; next_size <= range->max;
>> >+                       next_size += range->increment)
>> >+               if (size == next_size)
>> >+                       return 0;
>> >+
>> >+       return -1;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
>> >+       const struct rte_compressdev_capabilities *cap;
>> >+
>> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
>> >+                                            RTE_COMP_ALGO_DEFLATE);
>> >+
>> >+       if (cap == NULL) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not support DEFLATE\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       uint64_t comp_flags = cap->comp_feature_flags;
>> >+
>> >+       /* Huffman enconding */
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Fixed Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>> >+               RTE_LOG(ERR, USER1,
>> >+                       "Compress device does not supported Dynamic Huffman\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       /* Window size */
>> >+       if (test_data->window_sz != -1) {
>> >+               if (param_range_check(test_data->window_sz, &cap-
>> >window_size)
>> >+                               < 0) {
>> >+                       RTE_LOG(ERR, USER1,
>> >+                               "Compress device does not support "
>> >+                               "this window size\n");
>> >+                       return -1;
>> >+               }
>> >+       } else
>> >+               /* Set window size to PMD maximum if none was specified */
>> >+               test_data->window_sz = cap->window_size.max;
>> >+
>> >+       /* Check if chained mbufs is supported */
>> >+       if (test_data->max_sgl_segs > 1  &&
>> >+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
>> >+               RTE_LOG(INFO, USER1, "Compress device does not support "
>> >+                               "chained mbufs. Max SGL segments set to 1\n");
>> >+               test_data->max_sgl_segs = 1;
>> >+       }
>> >+
>> >+       /* Level 0 support */
>> >+       if (test_data->level.min == 0 &&
>> >+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
>> 0) {
>> >+               RTE_LOG(ERR, USER1, "Compress device does not support "
>> >+                               "level 0 (no compression)\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       return 0;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
>> >+       /* Number of segments for input and output
>> >+        * (compression and decompression)
>> >+        */
>> >+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
>> >+                       test_data->seg_sz);
>> >+       test_data->comp_buf_pool =
>> rte_pktmbuf_pool_create("comp_buf_pool",
>> >+                               total_segs,
>> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>> >+                               rte_socket_id());
>> >+       if (test_data->comp_buf_pool == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       cleanup = ST_MEMORY_ALLOC;
>> >+       test_data->decomp_buf_pool =
>> rte_pktmbuf_pool_create("decomp_buf_pool",
>> >+                               total_segs,
>> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
>> >+                               rte_socket_id());
>> >+       if (test_data->decomp_buf_pool == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>> >+               return -1;
>> >+       }
>> Unless am missing to see it, you need to free pre-allocated memories here
>> before return call for all failed cases.
>
>TJ: There's only one 'freeing stack' at the end of main application function to avoid double freeing resources (which was previously n
>V1).
>We have state machine for that stuff (static enum cleanup_st) to know what should be free and what has been allocated already.
>In case you mean the state machine is set just after first alloc in line 136:
>
>cleanup = ST_MEMORY_ALLOC;
>
>so we know what should be free at the end of application running in line 891:
>
>end:
>        switch (cleanup) {
>
>        case ST_DURING_TEST:
>        case ST_PREPARE_BUF:
>                free_bufs(test_data);
>                /* fallthrough */
>        case ST_MEMORY_ALLOC:
>                rte_free(test_data->decomp_bufs);
Even if we are in this state but it doesn't guarantee all of the buffers in this state are allocated. So shouldn't every pointer be null-checked before?
Thanks
Shally
>                rte_free(test_data->comp_bufs);
>                rte_free(test_data->decompressed_data);
>                rte_free(test_data->compressed_data);
>                rte_mempool_free(test_data->op_pool);
>                rte_mempool_free(test_data->decomp_buf_pool);
>                rte_mempool_free(test_data->comp_buf_pool);
>                /* fallthrough */
>        case ST_INPUT_DATA:
>                rte_free(test_data->input_data);
>                /* fallthrough */
>        case ST_COMPDEV:
>                if (test_data->cdev_id != -1)
>                        rte_compressdev_stop(test_data->cdev_id);
>                /* fallthrough */
>        case ST_TEST_DATA:
>                rte_free(test_data);
>                /* fallthrough */
>        case ST_CLEAR:
>        default:
>                i = rte_eal_cleanup();
>                if (i) {
>                        RTE_LOG(ERR, USER1,
>                                "Error from rte_eal_cleanup(), %d\n", i);
>                        ret = i;
>                }
>                break;
>        }
>        return ret;
>
>
>
>>
>> >+
>> >+       test_data->total_bufs = DIV_CEIL(total_segs,
>> >+ test_data->max_sgl_segs);
>> >+
>> >+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
>> >+                                 test_data->total_bufs,
>> >+                                 0, 0, rte_socket_id());
>> >+       if (test_data->op_pool == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Comp op mempool could not be
>> created\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       /*
>> >+        * Compressed data might be a bit larger than input data,
>> >+        * if data cannot be compressed
>> >+        */
>> >+       test_data->compressed_data = rte_zmalloc_socket(NULL,
>> >+                               test_data->input_data_sz * EXPANSE_RATIO
>> >+                                                       +
>> >+ MIN_ISAL_SIZE, 0,
>> MIN_ISAL_SIZE looks specific to ISAL driver. if so, then is this perf app specific
>> to that PMD? or Can we make it somewhat generic?
>
>TJ: True will be fixed
>
>
>>
>> >+                               rte_socket_id());
>> >+       if (test_data->compressed_data == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>> >+                               "file could not be allocated\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
>> >+                               test_data->input_data_sz, 0,
>> >+                               rte_socket_id());
>> >+       if (test_data->decompressed_data == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>> >+                               "file could not be allocated\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
>> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>> >+                       0, rte_socket_id());
>> >+       if (test_data->comp_bufs == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
>> >+                               " could not be allocated\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
>> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
>> >+                       0, rte_socket_id());
>> >+       if (test_data->decomp_bufs == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Memory to hold the decompression
>> mbufs"
>> >+                               " could not be allocated\n");
>> >+               return -1;
>> >+       }
>> >+       return 0;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
>> >+       FILE *f = fopen(test_data->input_file, "r");
>> >+       int ret = -1;
>> >+
>> >+       if (f == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (fseek(f, 0, SEEK_END) != 0) {
>> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>> >+               goto end;
>> >+       }
>> >+       size_t actual_file_sz = ftell(f);
>> >+       /* If extended input data size has not been set,
>> >+        * input data size = file size
>> >+        */
>> >+
>> >+       if (test_data->input_data_sz == 0)
>> >+               test_data->input_data_sz = actual_file_sz;
>> >+
>> >+       if (fseek(f, 0, SEEK_SET) != 0) {
>> >+               RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
>> >+               goto end;
>> >+       }
>> >+
>> >+       test_data->input_data = rte_zmalloc_socket(NULL,
>> >+                               test_data->input_data_sz, 0,
>> >+ rte_socket_id());
>> >+
>> >+       if (test_data->input_data == NULL) {
>> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
>> >+                               "file could not be allocated\n");
>> >+               goto end;
>> >+       }
>> >+
>> >+       size_t remaining_data = test_data->input_data_sz;
>> >+       uint8_t *data = test_data->input_data;
>> >+
>> >+       while (remaining_data > 0) {
>> >+               size_t data_to_read = RTE_MIN(remaining_data,
>> >+ actual_file_sz);
>> >+
>> >+               if (fread(data, data_to_read, 1, f) != 1) {
>> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>> >+                       goto end;
>> >+               }
>> >+               if (fseek(f, 0, SEEK_SET) != 0) {
>> >+                       RTE_LOG(ERR, USER1,
>> >+                               "Size of input could not be calculated\n");
>> >+                       goto end;
>> >+               }
>> >+               remaining_data -= data_to_read;
>> >+               data += data_to_read;
>> >+       }
>> >+
>> >+       if (test_data->input_data_sz > actual_file_sz)
>> >+               RTE_LOG(INFO, USER1,
>> >+                 "%zu bytes read from file %s, extending the file %.2f times\n",
>> >+                       test_data->input_data_sz, test_data->input_file,
>> >+                       (double)test_data->input_data_sz/actual_file_sz);
>> >+       else
>> >+               RTE_LOG(INFO, USER1,
>> >+                       "%zu bytes read from file %s\n",
>> >+                       test_data->input_data_sz,
>> >+ test_data->input_file);
>> >+
>> >+       ret = 0;
>> >+
>> >+end:
>> >+       fclose(f);
>> >+       return ret;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
>> >+       uint8_t enabled_cdev_count;
>> >+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
>> >+
>> >+       enabled_cdev_count = rte_compressdev_devices_get(test_data-
>> >driver_name,
>> >+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
>> >+       if (enabled_cdev_count == 0) {
>> >+               RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
>> >+                               test_data->driver_name);
>> >+               return -EINVAL;
>> >+       }
>> >+
>> >+       if (enabled_cdev_count > 1)
>> >+               RTE_LOG(INFO, USER1,
>> >+                       "Only the first compress device will be
>> >+ used\n");
>> >+
>> >+       test_data->cdev_id = enabled_cdevs[0];
>> >+
>> >+       if (comp_perf_check_capabilities(test_data) < 0)
>> >+               return -1;
>> >+
>> >+       /* Configure compressdev (one device, one queue pair) */
>> >+       struct rte_compressdev_config config = {
>> >+               .socket_id = rte_socket_id(),
>> >+               .nb_queue_pairs = 1,
>> >+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
>> >+               .max_nb_streams = 0
>> >+       };
>> >+
>> >+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
>> >+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
>> >+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
>> >+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
>> >+               RTE_LOG(ERR, USER1, "Device could not be started\n");
>> >+               return -1;
>> >+       }
>> >+
>> >+       return 0;
>> >+}
>> >+
>> >+static int
>> >+prepare_bufs(struct comp_test_data *test_data) {
>> >+       uint32_t remaining_data = test_data->input_data_sz;
>> >+       uint8_t *input_data_ptr = test_data->input_data;
>> >+       size_t data_sz;
>> >+       uint8_t *data_addr;
>> >+       uint32_t i, j;
>> >+
>> >+       for (i = 0; i < test_data->total_bufs; i++) {
>> >+               /* Allocate data in input mbuf and copy data from input file */
>> >+               test_data->decomp_bufs[i] =
>> >+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>> >+               if (test_data->decomp_bufs[i] == NULL) {
>> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>> >+                       return -1;
>> >+               }
>> >+
>> >+               cleanup = ST_PREPARE_BUF;
>> >+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
>> >+                                       test_data->decomp_bufs[i], data_sz);
>> >+               if (data_addr == NULL) {
>> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>> So is here ..free of allocated buffer before return from failed cases
>>
>> Thanks
>> Shally
>
>Thx, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-06  9:05             ` Jozwiak, TomaszX
@ 2018-11-06 15:39               ` Verma, Shally
  2018-11-07 10:18                 ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-11-06 15:39 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, akhil.goyal



>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 06 November 2018 14:36
>To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Tuesday, November 6, 2018 9:16 AM
>> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> measurement
...

>> >> >> >+
>> >> >> >+       /* Window size */
>> >> >> >+       if (test_data->window_sz != -1) {
>> >> >> >+               if (param_range_check(test_data->window_sz,
>> >> >> >+ &cap->window_size)
>> >> >> What if cap->window_size is 0 i.e. implementation default?
>> >> >
>> >> >TJ: You probably mean cap->window_size.increment = 0 (because
>> >> >cap->window_size is a structure). In that case we check if
>> >> >test_data->window_sz >=min and test_data->window_sz <= max only,
>> >> because increment = 0 means (base on compression API) we have only
>> >> one value of windows_size (no range is supported).
>> >> But PMD can set min and max too 0 for such case.
>> >
>> >TJ: I can't see any issue in that case too. Maybe I don't understand what you
>> mean but the logic is as follow:
>> >1)  if you pass '--window-sz  ...' param. into command line your
>> >intention is to force that value of window size during test. We check is this
>> value is allow (by param_range_check() function).
>> >2) if you plan to use default value - just don't pass '--window-sz'
>> >param. in command line at all. In that case we get windows size from
>> >window_size.max field, so if window_size.min= window_size.max=0
>> test_data->window_sz will be zero, as well.
>> >If you mean that behavior is not good - I will be grateful for other
>> suggestions.
>>
>> This is fine. but I am thinking of 3rd case here:
>> c) user pass window sz but PMD window_sz.min = max = 0, then user
>> requested windowsz is not applicable right?!
>
>In that case - true. There'll be fail :
>"Compress device does not support this window size\n");
>So what is your proposal for  that case?
>
We can set to window size to implementation default and add in diagnostic of used window sz for test run.
No need to fail here I believe.

Thanks
Shally

>
>
>
>
>>
>> >
>> >>
>> >> >
>> >> >
>> >> >
>> >> ....
>> >>
>> >> >> >+
>> >> >> >+               if (fread(data, data_to_read, 1, f) != 1) {
>> >> >> >+                       RTE_LOG(ERR, USER1, "Input file could not be read\n");
>> >> >> >+                       goto err;
>> >> >> >+               }
>> >> >> >+               if (fseek(f, 0, SEEK_SET) != 0) {
>> >> >> >+                       RTE_LOG(ERR, USER1,
>> >> >> >+                               "Size of input could not be calculated\n");
>> >> >> >+                       goto err;
>> >> >> >+               }
>> >> >> >+               remaining_data -= data_to_read;
>> >> >> >+               data += data_to_read;
>> >> >> It looks like it will run 2nd time only if input file size < input
>> >> >> data size in which case it will just keep filling input buffer
>> >> >> with repeated
>> >> data.
>> >> >> Is that the intention here?
>> >> >
>> >> >TJ: Yes exactly. If test_data->input_data_sz is bigger than
>> >> >actual_file_sz then we fill the buffer with repeated data from file
>> >> >to fill
>> >> whole buffer.
>> >> I mentioned in one of the earlier reply, wont that then influence the
>> >> compression behaviour and o/p? my suggestion was to work on actual
>> >> user provided input to take perf to get actual perf for given content.
>> >
>> >TJ: You right, but this solution is flexible. You can pass '
>> >--extended-input-sz" or not, so you can use original input data or extend it
>> if you want.
>> Ok. but still not sure if it's really needed. Might be practically most of the time
>> it wont be exercised. No hard opinion on this though.
>>
>> Thanks
>> Shally
>> >
>> >>
>> >> >
>> >> >>
>> >> ...
>> >>
>> >> >> >+                       if (data_addr == NULL) {
>> >> >> >+                               RTE_LOG(ERR, USER1, "Could not
>> >> >> >+ append data\n");
>> >> >> Since a new buffer per segment is allocated, so is it possible for
>> >> >> append to fail? think, this check is redundant here.
>> >> >
>> >> >TJ: Yes, you're right, it should never fail. But I think it's good
>> >> >coding practice
>> >> to add the check just in case.
>> >> >
>> >> Unless it is called in data path which might cost perf a bit.
>> >
>> >TJ:  prepare_bufs() is out of perf measurement, so shouldn't impact to
>> >measurements. The performance measurement is inside
>> >main_loop() only.
>> >
>> >
>> >Br, Tomek
>> >
>> >>
>> >> Thanks
>> >> Shally
>> >>
>> >> >> >+                               return -1;
>> >> >> >+                       }
>> >> >> >+
>> >> >> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
>> >> >> >+                       input_data_ptr += data_sz;
>> >> >> >+                       remaining_data -= data_sz;
>> >> >> >+
>> >> >> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
>> >> >> >+                                       next_seg) < 0) {
>> >> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >> >> >+                               return -1;
>> >> >> >+                       }
>> >> >> >+                       segs_per_mbuf++;
>> >> >> >+               }
>> >> >> >+
>> >> >> >+               /* Allocate data in output mbuf */
>> >> >> >+               test_data->comp_bufs[i] =
>> >> >> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >> >> >+               if (test_data->comp_bufs[i] == NULL) {
>> >> >> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>> >> >> >+                       return -1;
>> >> >> >+               }
>> >> >> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
>> >> >> >+                                       test_data->comp_bufs[i],
>> >> >> >+                                       test_data->seg_sz);
>> >> >> >+               if (data_addr == NULL) {
>> >> >> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
>> >> >> >+                       return -1;
>> >> >> >+               }
>> >> >> >+
>> >> >> >+               /* Chain mbufs if needed for output mbufs */
>> >> >> >+               for (j = 1; j < segs_per_mbuf; j++) {
>> >> >> >+                       struct rte_mbuf *next_seg =
>> >> >> >+
>> >> >> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
>> >> >> >+
>> >> >> >+                       if (next_seg == NULL) {
>> >> >> >+                               RTE_LOG(ERR, USER1,
>> >> >> >+                                       "Could not allocate mbuf\n");
>> >> >> >+                               return -1;
>> >> >> >+                       }
>> >> >> >+
>> >> >> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
>> >> >> >+                               test_data->seg_sz);
>> >> >> >+
>> >> >> >+                       if (data_addr == NULL) {
>> >> >> >+                               RTE_LOG(ERR, USER1, "Could not append data\n");
>> >> >> >+                               return -1;
>> >> >> >+                       }
>> >> >> >+
>> >> >> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
>> >> >> >+                                       next_seg) < 0) {
>> >> >> >+                               RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
>> >> >> >+                               return -1;
>> >> >> >+                       }
>> >> >> >+               }
>> >> >> >+       }
>> >> >> >+
>> >> >> >+       return 0;
>> >> >> >+}
>> >> >> >+
>> >> >> >+static void
>> >> >> >+free_bufs(struct comp_test_data *test_data) {
>> >> >> >+       uint32_t i;
>> >> >> >+
>> >> >> >+       for (i = 0; i < test_data->total_bufs; i++) {
>> >> >> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
>> >> >> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
>> >> >> >+       }
>> >> >> >+       rte_free(test_data->comp_bufs);
>> >> >> >+       rte_free(test_data->decomp_bufs); }
>> >> >> >+
>> >> >> >+static int
>> >> >> >+main_loop(struct comp_test_data *test_data, uint8_t level,
>> >> >> >+                       enum rte_comp_xform_type type,
>> >> >> >+                       uint8_t *output_data_ptr,
>> >> >> >+                       size_t *output_data_sz,
>> >> >> >+                       unsigned int benchmarking) {
>> >> >> >+       uint8_t dev_id = test_data->cdev_id;
>> >> >> >+       uint32_t i, iter, num_iter;
>> >> >> >+       struct rte_comp_op **ops, **deq_ops;
>> >> >> >+       void *priv_xform = NULL;
>> >> >> >+       struct rte_comp_xform xform;
>> >> >> >+       size_t output_size = 0;
>> >> >> >+       struct rte_mbuf **input_bufs, **output_bufs;
>> >> >> >+       int res = 0;
>> >> >> >+       int allocated = 0;
>> >> >> >+
>> >> >> >+       if (test_data == NULL || !test_data->burst_sz) {
>> >> >> >+               RTE_LOG(ERR, USER1,
>> >> >> >+                       "Unknow burst size\n");
>> >> >> >+               return -1;
>> >> >> >+       }
>> >> >> >+
>> >> >> >+       ops = rte_zmalloc_socket(NULL,
>> >> >> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
>> >> >> >+               0, rte_socket_id());
>> >> >> >+
>> >> >> >+       if (ops == NULL) {
>> >> >> >+               RTE_LOG(ERR, USER1,
>> >> >> >+                       "Can't allocate memory for ops strucures\n");
>> >> >> >+               return -1;
>> >> >> >+       }
>> >> >> >+
>> >> >> >+       deq_ops = &ops[test_data->total_bufs];
>> >> >> >+
>> >> >> >+       if (type == RTE_COMP_COMPRESS) {
>> >> >> >+               xform = (struct rte_comp_xform) {
>> >> >> >+                       .type = RTE_COMP_COMPRESS,
>> >> >> >+                       .compress = {
>> >> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >> >> >+                               .deflate.huffman = test_data->huffman_enc,
>> >> >> >+                               .level = level,
>> >> >> >+                               .window_size = test_data->window_sz,
>> >> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >> >> >+                       }
>> >> >> >+               };
>> >> >> >+               input_bufs = test_data->decomp_bufs;
>> >> >> >+               output_bufs = test_data->comp_bufs;
>> >> >> >+       } else {
>> >> >> >+               xform = (struct rte_comp_xform) {
>> >> >> >+                       .type = RTE_COMP_DECOMPRESS,
>> >> >> >+                       .decompress = {
>> >> >> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
>> >> >> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
>> >> >> >+                               .window_size = test_data->window_sz,
>> >> >> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
>> >> >> >+                       }
>> >> >> >+               };
>> >> >> >+               input_bufs = test_data->comp_bufs;
>> >> >> >+               output_bufs = test_data->decomp_bufs;
>> >> >> >+       }
>> >> >> >+
>> >> >> >+       /* Create private xform */
>> >> >> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
>> >> >> >+                       &priv_xform) < 0) {
>> >> >> >+               RTE_LOG(ERR, USER1, "Private xform could not be
>> created\n");
>> >> >> >+               res = -1;
>> >> >> >+               goto end;
>> >> >> >+       }
>> >> >> >+
>> >> >> >+       uint64_t tsc_start, tsc_end, tsc_duration;
>> >> >> >+
>> >> >> >+       tsc_start = tsc_end = tsc_duration = 0;
>> >> >> >+       if (benchmarking) {
>> >> >> >+               tsc_start = rte_rdtsc();
>> >> >> >+               num_iter = test_data->num_iter;
>> >> >> >+       } else
>> >> >> >+               num_iter = 1;
>> >> >> Looks like in same code we're doing benchmarking and functional
>> >> validation.
>> >> >> It can be reorganised to keep validation test separately like done
>> >> >> in crypto_perf.
>> >> >
>> >> >TJ: Ok, makes sense. However in the interests of getting this into
>> >> >the
>> >> >18.11 release I'd like to defer this refactoring and the remainder
>> >> >of your
>> >> comments below to the next release.
>> >> >
>> >> >
>> >> >Next comments - WIP
>> >> >
>> >> >
>> >> >Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] app/compress-perf: add performance measurement
  2018-11-06 15:37         ` Verma, Shally
@ 2018-11-07 10:14           ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-07 10:14 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Tuesday, November 6, 2018 4:37 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >Sent: 06 November 2018 14:19
> >To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe,
> Fiona
> ><fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Monday, November 5, 2018 9:57 AM
> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >> Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
> >> measurement
> >>
> >>
> >>
> >> >-----Original Message-----
> >> >From: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >> >Sent: 02 November 2018 15:14
> >> >To: dev@dpdk.org; fiona.trahe@intel.com; tomaszx.jozwiak@intel.com;
> >> >Verma, Shally <Shally.Verma@cavium.com>; akhil.goyal@nxp.com
> >> >Subject: [PATCH v2 2/3] app/compress-perf: add performance
> >> measurement
> >> >
> >> >External Email
> >> >
> >> >Added performance measurement part into compression perf. test.
> >> >
> >> >Signed-off-by: De Lara Guarch, Pablo
> >> ><pablo.de.lara.guarch@intel.com>
> >> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
> >> >---
> >> > app/test-compress-perf/comp_perf_options_parse.c |   8 +-
> >> > app/test-compress-perf/main.c                    | 886
> >> ++++++++++++++++++++++-
> >> > 2 files changed, 883 insertions(+), 11 deletions(-)
> >> >
> >> >diff --git a/app/test-compress-perf/comp_perf_options_parse.c
> >> >b/app/test-compress-perf/comp_perf_options_parse.c
> >> >index bef4d2f..e5da3ad 100644
> >> >--- a/app/test-compress-perf/comp_perf_options_parse.c
> >> >+++ b/app/test-compress-perf/comp_perf_options_parse.c
> >> >@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data
> >> *test_data, const char *arg)
> >> >                {
> >> >                        "fixed",
> >> >                        RTE_COMP_HUFFMAN_FIXED
> >> >-               },
> >> >-               {
> >> >-                       "dynamic",
> >> >-                       RTE_COMP_HUFFMAN_DYNAMIC
> >> >                }
> >> >        };
> >> >
> >> >@@ -569,9 +565,9 @@ comp_perf_options_default(struct
> comp_test_data
> >> *test_data)
> >> >        test_data->seg_sz = 2048;
> >> >        test_data->burst_sz = 32;
> >> >        test_data->pool_sz = 8192;
> >> >-       test_data->max_sgl_segs = UINT16_MAX;
> >> >+       test_data->max_sgl_segs = 16;
> >> >        test_data->num_iter = 10000;
> >> >-       test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
> >> >+       test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
> >> >        test_data->test_op = COMPRESS_DECOMPRESS;
> >> >        test_data->window_sz = -1;
> >> >        test_data->level.min = 1;
> >> >diff --git a/app/test-compress-perf/main.c
> >> >b/app/test-compress-perf/main.c index f52b98d..e3f4bf6 100644
> >> >--- a/app/test-compress-perf/main.c
> >> >+++ b/app/test-compress-perf/main.c
> >> >@@ -5,14 +5,728 @@
> >> > #include <rte_malloc.h>
> >> > #include <rte_eal.h>
> >> > #include <rte_log.h>
> >> >+#include <rte_cycles.h>
> >> > #include <rte_compressdev.h>
> >> >
> >> > #include "comp_perf_options.h"
> >> >
> >> >+#define NUM_MAX_XFORMS 16
> >> >+#define NUM_MAX_INFLIGHT_OPS 512
> >> >+#define EXPANSE_RATIO 1.05
> >> >+#define MIN_ISAL_SIZE 8
> >> Can we avoid ISAL specific naming ?
> >
> >TJ: yes true :) will be fixed in V3
> >
> >
> >
> >
> >> >+
> >> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >> >+
> >> >+/* Cleanup state machine */
> >> >+static enum cleanup_st {
> >> >+       ST_CLEAR = 0,
> >> >+       ST_TEST_DATA,
> >> >+       ST_COMPDEV,
> >> >+       ST_INPUT_DATA,
> >> >+       ST_MEMORY_ALLOC,
> >> >+       ST_PREPARE_BUF,
> >> >+       ST_DURING_TEST
> >> >+} cleanup = ST_CLEAR;
> >> >+
> >> >+static int
> >> >+param_range_check(uint16_t size, const struct rte_param_log2_range
> >> >+*range) {
> >> >+       unsigned int next_size;
> >> >+
> >> >+       /* Check lower/upper bounds */
> >> >+       if (size < range->min)
> >> >+               return -1;
> >> >+
> >> >+       if (size > range->max)
> >> >+               return -1;
> >> >+
> >> >+       /* If range is actually only one value, size is correct */
> >> >+       if (range->increment == 0)
> >> >+               return 0;
> >> >+
> >> >+       /* Check if value is one of the supported sizes */
> >> >+       for (next_size = range->min; next_size <= range->max;
> >> >+                       next_size += range->increment)
> >> >+               if (size == next_size)
> >> >+                       return 0;
> >> >+
> >> >+       return -1;
> >> >+}
> >> >+
> >> >+static int
> >> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >> >+       const struct rte_compressdev_capabilities *cap;
> >> >+
> >> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >> >+                                            RTE_COMP_ALGO_DEFLATE);
> >> >+
> >> >+       if (cap == NULL) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not support DEFLATE\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >> >+
> >> >+       /* Huffman enconding */
> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not supported Fixed Huffman\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC
> &&
> >> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0)
> {
> >> >+               RTE_LOG(ERR, USER1,
> >> >+                       "Compress device does not supported Dynamic
> Huffman\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       /* Window size */
> >> >+       if (test_data->window_sz != -1) {
> >> >+               if (param_range_check(test_data->window_sz, &cap-
> >> >window_size)
> >> >+                               < 0) {
> >> >+                       RTE_LOG(ERR, USER1,
> >> >+                               "Compress device does not support "
> >> >+                               "this window size\n");
> >> >+                       return -1;
> >> >+               }
> >> >+       } else
> >> >+               /* Set window size to PMD maximum if none was specified */
> >> >+               test_data->window_sz = cap->window_size.max;
> >> >+
> >> >+       /* Check if chained mbufs is supported */
> >> >+       if (test_data->max_sgl_segs > 1  &&
> >> >+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) ==
> 0) {
> >> >+               RTE_LOG(INFO, USER1, "Compress device does not support "
> >> >+                               "chained mbufs. Max SGL segments set to 1\n");
> >> >+               test_data->max_sgl_segs = 1;
> >> >+       }
> >> >+
> >> >+       /* Level 0 support */
> >> >+       if (test_data->level.min == 0 &&
> >> >+                       (comp_flags &
> >> >+ RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
> >> 0) {
> >> >+               RTE_LOG(ERR, USER1, "Compress device does not support "
> >> >+                               "level 0 (no compression)\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       return 0;
> >> >+}
> >> >+
> >> >+static int
> >> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
> >> >+       /* Number of segments for input and output
> >> >+        * (compression and decompression)
> >> >+        */
> >> >+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
> >> >+                       test_data->seg_sz);
> >> >+       test_data->comp_buf_pool =
> >> rte_pktmbuf_pool_create("comp_buf_pool",
> >> >+                               total_segs,
> >> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >> >+                               rte_socket_id());
> >> >+       if (test_data->comp_buf_pool == NULL) {
> >> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be
> created\n");
> >> >+               return -1;
> >> >+       }
> >> >+
> >> >+       cleanup = ST_MEMORY_ALLOC;
> >> >+       test_data->decomp_buf_pool =
> >> rte_pktmbuf_pool_create("decomp_buf_pool",
> >> >+                               total_segs,
> >> >+                               0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
> >> >+                               rte_socket_id());
> >> >+       if (test_data->decomp_buf_pool == NULL) {
> >> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be
> created\n");
> >> >+               return -1;
> >> >+       }
> >> Unless am missing to see it, you need to free pre-allocated memories
> >> here before return call for all failed cases.
> >
> >TJ: There's only one 'freeing stack' at the end of main application
> >function to avoid double freeing resources (which was previously n V1).
> >We have state machine for that stuff (static enum cleanup_st) to know
> what should be free and what has been allocated already.
> >In case you mean the state machine is set just after first alloc in line 136:
> >
> >cleanup = ST_MEMORY_ALLOC;
> >
> >so we know what should be free at the end of application running in line
> 891:
> >
> >end:
> >        switch (cleanup) {
> >
> >        case ST_DURING_TEST:
> >        case ST_PREPARE_BUF:
> >                free_bufs(test_data);
> >                /* fallthrough */
> >        case ST_MEMORY_ALLOC:
> >                rte_free(test_data->decomp_bufs);
> Even if we are in this state but it doesn't guarantee all of the buffers in this
> state are allocated. So shouldn't every pointer be null-checked before?
> Thanks
> Shally



This check is inside free function already - not need to add double check. 

Thx, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-06 15:39               ` Verma, Shally
@ 2018-11-07 10:18                 ` Jozwiak, TomaszX
  2018-11-10  0:54                   ` Trahe, Fiona
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-07 10:18 UTC (permalink / raw)
  To: Verma, Shally, dev, Trahe, Fiona, akhil.goyal



> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Tuesday, November 6, 2018 4:40 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >Sent: 06 November 2018 14:36
> >To: Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org; Trahe,
> Fiona
> ><fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Tuesday, November 6, 2018 9:16 AM
> >> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> performance measurement
> ...
> 
> >> >> >> >+
> >> >> >> >+       /* Window size */
> >> >> >> >+       if (test_data->window_sz != -1) {
> >> >> >> >+               if (param_range_check(test_data->window_sz,
> >> >> >> >+ &cap->window_size)
> >> >> >> What if cap->window_size is 0 i.e. implementation default?
> >> >> >
> >> >> >TJ: You probably mean cap->window_size.increment = 0 (because
> >> >> >cap->window_size is a structure). In that case we check if
> >> >> >test_data->window_sz >=min and test_data->window_sz <= max
> only,
> >> >> because increment = 0 means (base on compression API) we have only
> >> >> one value of windows_size (no range is supported).
> >> >> But PMD can set min and max too 0 for such case.
> >> >
> >> >TJ: I can't see any issue in that case too. Maybe I don't understand
> >> >what you
> >> mean but the logic is as follow:
> >> >1)  if you pass '--window-sz  ...' param. into command line your
> >> >intention is to force that value of window size during test. We
> >> >check is this
> >> value is allow (by param_range_check() function).
> >> >2) if you plan to use default value - just don't pass '--window-sz'
> >> >param. in command line at all. In that case we get windows size from
> >> >window_size.max field, so if window_size.min= window_size.max=0
> >> test_data->window_sz will be zero, as well.
> >> >If you mean that behavior is not good - I will be grateful for other
> >> suggestions.
> >>
> >> This is fine. but I am thinking of 3rd case here:
> >> c) user pass window sz but PMD window_sz.min = max = 0, then user
> >> requested windowsz is not applicable right?!
> >
> >In that case - true. There'll be fail :
> >"Compress device does not support this window size\n"); So what is your
> >proposal for  that case?
> >
> We can set to window size to implementation default and add in diagnostic
> of used window sz for test run.
> No need to fail here I believe.
> 
> Thanks
> Shally

Ok, I'll try to implement that feature in V3


Br, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-07 10:18                 ` Jozwiak, TomaszX
@ 2018-11-10  0:54                   ` Trahe, Fiona
  2018-11-12  4:45                     ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Trahe, Fiona @ 2018-11-10  0:54 UTC (permalink / raw)
  To: Jozwiak, TomaszX, Verma, Shally, dev, akhil.goyal; +Cc: Trahe, Fiona

Hi Shally, Tomasz,

> > >> >> >> >+       /* Window size */
> > >> >> >> >+       if (test_data->window_sz != -1) {
> > >> >> >> >+               if (param_range_check(test_data->window_sz,
> > >> >> >> >+ &cap->window_size)
> > >> >> >> What if cap->window_size is 0 i.e. implementation default?
> > >> >> >
> > >> >> >TJ: You probably mean cap->window_size.increment = 0 (because
> > >> >> >cap->window_size is a structure). In that case we check if
> > >> >> >test_data->window_sz >=min and test_data->window_sz <= max
> > only,
> > >> >> because increment = 0 means (base on compression API) we have only
> > >> >> one value of windows_size (no range is supported).
> > >> >> But PMD can set min and max too 0 for such case.
> > >> >
> > >> >TJ: I can't see any issue in that case too. Maybe I don't understand
> > >> >what you
> > >> mean but the logic is as follow:
> > >> >1)  if you pass '--window-sz  ...' param. into command line your
> > >> >intention is to force that value of window size during test. We
> > >> >check is this
> > >> value is allow (by param_range_check() function).
> > >> >2) if you plan to use default value - just don't pass '--window-sz'
> > >> >param. in command line at all. In that case we get windows size from
> > >> >window_size.max field, so if window_size.min= window_size.max=0
> > >> test_data->window_sz will be zero, as well.
> > >> >If you mean that behavior is not good - I will be grateful for other
> > >> suggestions.
> > >>
> > >> This is fine. but I am thinking of 3rd case here:
> > >> c) user pass window sz but PMD window_sz.min = max = 0, then user
> > >> requested windowsz is not applicable right?!
> > >
> > >In that case - true. There'll be fail :
> > >"Compress device does not support this window size\n"); So what is your
> > >proposal for  that case?
> > >
> > We can set to window size to implementation default and add in diagnostic
> > of used window sz for test run.
> > No need to fail here I believe.

[Fiona] For Window size capability reported by the PMD in the info struct 
it is not valid to report min=0, max=0. The PMD must report the range it can
handle - the API doesn't suggest otherwise. 
On the xform a specific window size is requested of the PMD, if it doesn't support
this it's allowed to fall back to a lower size according to the API.
However that doesn't mean the PMD can pick any size if it doesn't support the
requested size, i.e. it can't pick a bigger size, just a smaller one. 
If an application requests a smaller window size
than a PMD supports, it can be that the decompression engine
will be unable to decompress if a larger window is used, so the PMD 
should only fall back to a smaller size.
Based on above, I think the perf tool behaviour is ok. 
It should pass the user requested value to the PMD if the PMD capabilities support it.
If not it should fail. If the user wants to measure with a different window size they can
pass in that parameter.
The functional test suite can be used to validate the case where the PMD
falls back - this is not what the perf tool is for.
Does this make sense?

@Shally, do you think we need an API change to support an unlimited set of window sizes?
If so can you explain why?
 

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-10  0:54                   ` Trahe, Fiona
@ 2018-11-12  4:45                     ` Verma, Shally
  0 siblings, 0 replies; 76+ messages in thread
From: Verma, Shally @ 2018-11-12  4:45 UTC (permalink / raw)
  To: Trahe, Fiona, Jozwiak, TomaszX, dev, akhil.goyal



>-----Original Message-----
>From: Trahe, Fiona <fiona.trahe@intel.com>
>Sent: 10 November 2018 06:24
>To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; Verma, Shally <Shally.Verma@cavium.com>; dev@dpdk.org;
>akhil.goyal@nxp.com
>Cc: Trahe, Fiona <fiona.trahe@intel.com>
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Hi Shally, Tomasz,
>
>> > >> >> >> >+       /* Window size */
>> > >> >> >> >+       if (test_data->window_sz != -1) {
>> > >> >> >> >+               if (param_range_check(test_data->window_sz,
>> > >> >> >> >+ &cap->window_size)
>> > >> >> >> What if cap->window_size is 0 i.e. implementation default?
>> > >> >> >
>> > >> >> >TJ: You probably mean cap->window_size.increment = 0 (because
>> > >> >> >cap->window_size is a structure). In that case we check if
>> > >> >> >test_data->window_sz >=min and test_data->window_sz <= max
>> > only,
>> > >> >> because increment = 0 means (base on compression API) we have only
>> > >> >> one value of windows_size (no range is supported).
>> > >> >> But PMD can set min and max too 0 for such case.
>> > >> >
>> > >> >TJ: I can't see any issue in that case too. Maybe I don't understand
>> > >> >what you
>> > >> mean but the logic is as follow:
>> > >> >1)  if you pass '--window-sz  ...' param. into command line your
>> > >> >intention is to force that value of window size during test. We
>> > >> >check is this
>> > >> value is allow (by param_range_check() function).
>> > >> >2) if you plan to use default value - just don't pass '--window-sz'
>> > >> >param. in command line at all. In that case we get windows size from
>> > >> >window_size.max field, so if window_size.min= window_size.max=0
>> > >> test_data->window_sz will be zero, as well.
>> > >> >If you mean that behavior is not good - I will be grateful for other
>> > >> suggestions.
>> > >>
>> > >> This is fine. but I am thinking of 3rd case here:
>> > >> c) user pass window sz but PMD window_sz.min = max = 0, then user
>> > >> requested windowsz is not applicable right?!
>> > >
>> > >In that case - true. There'll be fail :
>> > >"Compress device does not support this window size\n"); So what is your
>> > >proposal for  that case?
>> > >
>> > We can set to window size to implementation default and add in diagnostic
>> > of used window sz for test run.
>> > No need to fail here I believe.
>
>[Fiona] For Window size capability reported by the PMD in the info struct
>it is not valid to report min=0, max=0. The PMD must report the range it can
>handle - the API doesn't suggest otherwise.
>On the xform a specific window size is requested of the PMD, if it doesn't support
>this it's allowed to fall back to a lower size according to the API.
>However that doesn't mean the PMD can pick any size if it doesn't support the
>requested size, i.e. it can't pick a bigger size, just a smaller one.
>If an application requests a smaller window size
>than a PMD supports, it can be that the decompression engine
>will be unable to decompress if a larger window is used, so the PMD
>should only fall back to a smaller size.
>Based on above, I think the perf tool behaviour is ok.
>It should pass the user requested value to the PMD if the PMD capabilities support it.
Agree to this. However my point is what if PMD just leave these window sz values as 0, meaning implementation default i.e.
internally used fixed value used by PMD to lookup for both compression/decompression. But if we are not supporting  window sz = 0 on an API then its fine , no need to handle this special case. However given that, we need to add comment in capability field, PMD must set it to some non-zero value and 0 is not valid case to handle.

>If not it should fail. If the user wants to measure with a different window size they can
>pass in that parameter.
>The functional test suite can be used to validate the case where the PMD
>falls back - this is not what the perf tool is for.
>Does this make sense?
>
>@Shally, do you think we need an API change to support an unlimited set of window sizes?
>If so can you explain why?
No.I don't intend to add support something like unlimited window sz as that isn't a known use-case. Also, I didn't mean window sz = 0  to be interpreted as unlimited window sz. I just meant 0 = implementation default window sz , if that's supported on compression spec. 

Thanks
Shally
>

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 0/5] add initial version of compress-perf
  2018-11-02  9:43 ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Tomasz Jozwiak
                     ` (3 preceding siblings ...)
  2018-11-02 11:04   ` [dpdk-dev] [PATCH v2 0/3] add initial version of compress-perf Bruce Richardson
@ 2018-11-23 13:06   ` Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 1/5] app/compress-perf: add parser Tomasz Jozwiak
                       ` (5 more replies)
  4 siblings, 6 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v3 changes:
  - Added dynamic compression
  - Code refactoring to separate validation
    from benchmarking part
  - Updated documentation
  - Added fail detection from rte_compressdev_enqueue_burst
    and rte_compressdev_dequeue_burst functions
  - Code cleanup

  Note: The 19.02 release notes will be updated once the file has been created

v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation

Opens:
  Comment from Shally Verma re removig `--max-num-sgl-segs'
  option from command line (it can be done after compression API
  change, (eg.: adding new capability field into the
  rte_compressdev_info struct)

Tomasz Jozwiak (5):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files
  app/compress-perf: add dynamic compression test
  app/compress-perf: code refactoring

 MAINTAINERS                                       |   5 +
 app/Makefile                                      |   4 +
 app/meson.build                                   |   1 +
 app/test-compress-perf/Makefile                   |  18 +
 app/test-compress-perf/comp_perf_options.h        |  71 +++
 app/test-compress-perf/comp_perf_options_parse.c  | 596 ++++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.c | 291 +++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 337 ++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 590 +++++++++++++++++++++
 app/test-compress-perf/meson.build                |   9 +
 config/common_base                                |   5 +
 doc/guides/rel_notes/release_18_11.rst            |   4 +
 doc/guides/tools/comp_perf.rst                    |  75 +++
 doc/guides/tools/index.rst                        |   1 +
 16 files changed, 2033 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 1/5] app/compress-perf: add parser
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
@ 2018-11-23 13:06     ` Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 740 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index a9a026b..47a2a86 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..5b9ea26
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,596 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 16)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
+		"		(default: dynamic)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (strcmp(test_data->driver_name, "") == 0) {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (strcmp(test_data->input_file, "") == 0) {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index d12ae98..2ab4b7b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 2/5] app/compress-perf: add performance measurement
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 1/5] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-11-23 13:06     ` Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c |   8 +-
 app/test-compress-perf/main.c                    | 886 ++++++++++++++++++++++-
 2 files changed, 883 insertions(+), 11 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 5b9ea26..fd5d31c 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
-		},
-		{
-			"dynamic",
-			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
-	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..5950c96 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,14 +5,728 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_COMPRESSED_BUF_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+/* Cleanup state machine */
+static enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_PREPARE_BUF,
+	ST_DURING_TEST
+} cleanup = ST_CLEAR;
+
+static int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static int
+comp_perf_check_capabilities(struct comp_test_data *test_data)
+{
+	const struct rte_compressdev_capabilities *cap;
+
+	cap = rte_compressdev_capability_get(test_data->cdev_id,
+					     RTE_COMP_ALGO_DEFLATE);
+
+	if (cap == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not support DEFLATE\n");
+		return -1;
+	}
+
+	uint64_t comp_flags = cap->comp_feature_flags;
+
+	/* Huffman enconding */
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Fixed Huffman\n");
+		return -1;
+	}
+
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Dynamic Huffman\n");
+		return -1;
+	}
+
+	/* Window size */
+	if (test_data->window_sz != -1) {
+		if (param_range_check(test_data->window_sz, &cap->window_size)
+				< 0) {
+			RTE_LOG(ERR, USER1,
+				"Compress device does not support "
+				"this window size\n");
+			return -1;
+		}
+	} else
+		/* Set window size to PMD maximum if none was specified */
+		test_data->window_sz = cap->window_size.max;
+
+	/* Check if chained mbufs is supported */
+	if (test_data->max_sgl_segs > 1  &&
+			(comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
+		RTE_LOG(INFO, USER1, "Compress device does not support "
+				"chained mbufs. Max SGL segments set to 1\n");
+		test_data->max_sgl_segs = 1;
+	}
+
+	/* Level 0 support */
+	if (test_data->level.min == 0 &&
+			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
+		RTE_LOG(ERR, USER1, "Compress device does not support "
+				"level 0 (no compression)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+comp_perf_allocate_memory(struct comp_test_data *test_data)
+{
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	cleanup = ST_MEMORY_ALLOC;
+	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	test_data->op_pool = rte_comp_op_pool_create("op_pool",
+				  test_data->total_bufs,
+				  0, 0, rte_socket_id());
+	if (test_data->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	test_data->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+						+ MIN_COMPRESSED_BUF_SIZE, 0,
+				rte_socket_id());
+	if (test_data->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (test_data->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->comp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+comp_perf_dump_input_data(struct comp_test_data *test_data)
+{
+	FILE *f = fopen(test_data->input_file, "r");
+	int ret = -1;
+
+	if (f == NULL) {
+		RTE_LOG(ERR, USER1, "Input file could not be opened\n");
+		return -1;
+	}
+
+	if (fseek(f, 0, SEEK_END) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+	size_t actual_file_sz = ftell(f);
+	/* If extended input data size has not been set,
+	 * input data size = file size
+	 */
+
+	if (test_data->input_data_sz == 0)
+		test_data->input_data_sz = actual_file_sz;
+
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+
+	test_data->input_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0, rte_socket_id());
+
+	if (test_data->input_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		goto end;
+	}
+
+	size_t remaining_data = test_data->input_data_sz;
+	uint8_t *data = test_data->input_data;
+
+	while (remaining_data > 0) {
+		size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
+
+		if (fread(data, data_to_read, 1, f) != 1) {
+			RTE_LOG(ERR, USER1, "Input file could not be read\n");
+			goto end;
+		}
+		if (fseek(f, 0, SEEK_SET) != 0) {
+			RTE_LOG(ERR, USER1,
+				"Size of input could not be calculated\n");
+			goto end;
+		}
+		remaining_data -= data_to_read;
+		data += data_to_read;
+	}
+
+	if (test_data->input_data_sz > actual_file_sz)
+		RTE_LOG(INFO, USER1,
+		  "%zu bytes read from file %s, extending the file %.2f times\n",
+			test_data->input_data_sz, test_data->input_file,
+			(double)test_data->input_data_sz/actual_file_sz);
+	else
+		RTE_LOG(INFO, USER1,
+			"%zu bytes read from file %s\n",
+			test_data->input_data_sz, test_data->input_file);
+
+	ret = 0;
+
+end:
+	fclose(f);
+	return ret;
+}
+
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
+{
+	uint8_t enabled_cdev_count;
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
+
+	if (enabled_cdev_count > 1)
+		RTE_LOG(INFO, USER1,
+			"Only the first compress device will be used\n");
+
+	test_data->cdev_id = enabled_cdevs[0];
+
+	if (comp_perf_check_capabilities(test_data) < 0)
+		return -1;
+
+	/* Configure compressdev (one device, one queue pair) */
+	struct rte_compressdev_config config = {
+		.socket_id = rte_socket_id(),
+		.nb_queue_pairs = 1,
+		.max_nb_priv_xforms = NUM_MAX_XFORMS,
+		.max_nb_streams = 0
+	};
+
+	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
+		RTE_LOG(ERR, USER1, "Device configuration failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
+			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
+		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_start(test_data->cdev_id) < 0) {
+		RTE_LOG(ERR, USER1, "Device could not be started\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+prepare_bufs(struct comp_test_data *test_data)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		test_data->decomp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+		if (test_data->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		cleanup = ST_PREPARE_BUF;
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		test_data->comp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->comp_buf_pool);
+		if (test_data->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->comp_bufs[i],
+					test_data->seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+free_bufs(struct comp_test_data *test_data)
+{
+	uint32_t i;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		rte_pktmbuf_free(test_data->comp_bufs[i]);
+		rte_pktmbuf_free(test_data->decomp_bufs[i]);
+	}
+}
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz,
+			unsigned int benchmarking)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	if (benchmarking) {
+		tsc_start = rte_rdtsc();
+		num_iter = test_data->num_iter;
+	} else
+		num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							  op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (benchmarking) {
+		tsc_end = rte_rdtsc();
+		tsc_duration = tsc_end - tsc_start;
+
+		if (type == RTE_COMP_COMPRESS)
+			test_data->comp_tsc_duration[level] =
+					tsc_duration / num_iter;
+		else
+			test_data->decomp_tsc_duration[level] =
+					tsc_duration / num_iter;
+	}
+
+	if (benchmarking == 0 && output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	uint8_t level, level_idx = 0;
+	int ret, i;
 	struct comp_test_data *test_data;
 
 	/* Initialise DPDK EAL */
@@ -29,24 +743,186 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
 	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
 		RTE_LOG(ERR, USER1,
 			"Parsing one or more user options failed\n");
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
 	}
 
 	if (comp_perf_options_check(test_data) < 0) {
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
+	}
+
+	if (comp_perf_initialize_compressdev(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_COMPDEV;
+	if (comp_perf_dump_input_data(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_INPUT_DATA;
+	if (comp_perf_allocate_memory(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (prepare_bufs(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->level.inc != 0)
+		level = test_data->level.min;
+	else
+		level = test_data->level.list[0];
+
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+
+	printf("Burst size = %u\n", test_data->burst_sz);
+	printf("File size = %zu\n", test_data->input_data_sz);
+
+	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
+		"Level", "Comp size", "Comp ratio [%]",
+		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
+		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+
+	cleanup = ST_DURING_TEST;
+	while (level <= test_data->level.max) {
+		/*
+		 * Run a first iteration, to verify compression and
+		 * get the compression ratio for the level
+		 */
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+			      test_data->compressed_data,
+			      &comp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+			      test_data->decompressed_data,
+			      &decomp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (decomp_data_sz != test_data->input_data_sz) {
+			RTE_LOG(ERR, USER1,
+		   "Decompressed data length not equal to input data length\n");
+			RTE_LOG(ERR, USER1,
+				"Decompressed size = %zu, expected = %zu\n",
+				decomp_data_sz, test_data->input_data_sz);
+			ret = EXIT_FAILURE;
+			goto end;
+		} else {
+			if (memcmp(test_data->decompressed_data,
+					test_data->input_data,
+					test_data->input_data_sz) != 0) {
+				RTE_LOG(ERR, USER1,
+			    "Decompressed data is not the same as file data\n");
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		double ratio = (double) comp_data_sz /
+						test_data->input_data_sz * 100;
+
+		/*
+		 * Run the tests twice, discarding the first performance
+		 * results, before the cache is warmed up
+		 */
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		uint64_t comp_tsc_duration =
+				test_data->comp_tsc_duration[level];
+		double comp_tsc_byte = (double)comp_tsc_duration /
+						test_data->input_data_sz;
+		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
+				1000000000;
+		uint64_t decomp_tsc_duration =
+				test_data->decomp_tsc_duration[level];
+		double decomp_tsc_byte = (double)decomp_tsc_duration /
+						test_data->input_data_sz;
+		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
+				1000000000;
+
+		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
+					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
+		       level, comp_data_sz, ratio, comp_tsc_duration,
+		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
+		       decomp_tsc_byte, decomp_gbps);
+
+		if (test_data->level.inc != 0)
+			level += test_data->level.inc;
+		else {
+			if (++level_idx == test_data->level.count)
+				break;
+			level = test_data->level.list[level_idx];
+		}
 	}
 
 	ret = EXIT_SUCCESS;
 
-err:
-	rte_free(test_data);
+end:
+	switch (cleanup) {
 
+	case ST_DURING_TEST:
+	case ST_PREPARE_BUF:
+		free_bufs(test_data);
+		/* fallthrough */
+	case ST_MEMORY_ALLOC:
+		rte_free(test_data->decomp_bufs);
+		rte_free(test_data->comp_bufs);
+		rte_free(test_data->decompressed_data);
+		rte_free(test_data->compressed_data);
+		rte_mempool_free(test_data->op_pool);
+		rte_mempool_free(test_data->decomp_buf_pool);
+		rte_mempool_free(test_data->comp_buf_pool);
+		/* fallthrough */
+	case ST_INPUT_DATA:
+		rte_free(test_data->input_data);
+		/* fallthrough */
+	case ST_COMPDEV:
+		if (test_data->cdev_id != -1)
+			rte_compressdev_stop(test_data->cdev_id);
+		/* fallthrough */
+	case ST_TEST_DATA:
+		rte_free(test_data);
+		/* fallthrough */
+	case ST_CLEAR:
+	default:
+		i = rte_eal_cleanup();
+		if (i) {
+			RTE_LOG(ERR, USER1,
+				"Error from rte_eal_cleanup(), %d\n", i);
+			ret = i;
+		}
+		break;
+	}
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 1/5] app/compress-perf: add parser Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-11-23 13:06     ` Tomasz Jozwiak
  2018-11-23 14:52       ` Varghese, Vipin
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
                       ` (2 subsequent siblings)
  5 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added:
 -  initial version of compression performance test
    description file.
 -  release note in release_18_11.rst

Updated index.rst file

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 MAINTAINERS                            |  5 +++
 doc/guides/rel_notes/release_18_11.rst |  5 +++
 doc/guides/tools/comp_perf.rst         | 75 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/index.rst             |  1 +
 4 files changed, 86 insertions(+)
 create mode 100644 doc/guides/tools/comp_perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 71ba312..dd0c131 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1242,6 +1242,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Compression performance test application
+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
+F: app/test-compress-perf/
+F: doc/guides/tools/comp_perf.rst
+
 Crypto performance test application
 M: Declan Doherty <declan.doherty@intel.com>
 F: app/test-crypto-perf/
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 32ff0e5..d44cf30 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -328,6 +328,11 @@ New Features
   additional command-line parameter values from the "DPDK_TEST_PARAMS"
   environment variable to make this application easier to use.
 
+* **Added a compression performance test tool.**
+
+   Added a new performance test tool to test the compressdev PMD. The tool tests
+   compression ratio and compression throughput. Dynamic compression test is not
+   supported yet.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
new file mode 100644
index 0000000..1428348
--- /dev/null
+++ b/doc/guides/tools/comp_perf.rst
@@ -0,0 +1,75 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+dpdk-test-compress-perf Application
+===================================
+
+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
+utility that allows measuring performance parameters of PMDs available in the
+compress tree. The application reads the data from a file (--input-file),
+dumps all the file into a buffer and fills out the data of input mbufs,
+which are passed to compress device with compression operations.
+Then, the output buffers are fed into the decompression stage, and the resulting
+data is compared against the original data (verification phase). After that,
+a number of iterations are performed, compressing first and decompressing later,
+to check the throughput rate
+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
+
+
+Limitations
+~~~~~~~~~~~
+
+* Only supports the fixed compression and stateless operation.
+
+Command line options
+--------------------
+
+ ``--driver-name NAME``: compress driver to use
+
+ ``--input-file NAME``: file to compress and decompress
+
+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
+
+ ``--seg-sz N``: size of segment to store the data (default: 2048)
+
+ ``--burst-sz N``: compress operation burst size
+
+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
+
+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 16)
+
+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
+
+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
+
+ ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+
+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
+
+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
+
+ ``-h``: prints this help
+
+
+Compiling the Application
+-------------------------
+
+**Step 1: PMD setting**
+
+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
+can be disabled by default in the build configuration file ``common_base``.
+The compression device drivers PMD which should be tested can be enabled by setting::
+
+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
+
+
+Running the Application
+-----------------------
+
+The tool application has a number of command line options. Here is the sample command line:
+
+.. code-block:: console
+
+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
+
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index a6e2c4c..24235ba 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -42,3 +42,4 @@ DPDK Tools User Guides
     testbbdev
     cryptoperf
     testeventdev
+    comp_perf
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 4/5] app/compress-perf: add dynamic compression test
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
                       ` (2 preceding siblings ...)
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-23 13:06     ` Tomasz Jozwiak
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

    Added dynamic compression feature into compression perf. test.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c | 6 +++++-
 doc/guides/rel_notes/release_18_11.rst           | 3 +--
 doc/guides/tools/comp_perf.rst                   | 4 ++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index fd5d31c..dbe8135 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -431,6 +431,10 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -567,7 +571,7 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->pool_sz = 8192;
 	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index d44cf30..848cc06 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -331,8 +331,7 @@ New Features
 * **Added a compression performance test tool.**
 
    Added a new performance test tool to test the compressdev PMD. The tool tests
-   compression ratio and compression throughput. Dynamic compression test is not
-   supported yet.
+   compression ratio and compression throughput.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
index 1428348..abb727d 100644
--- a/doc/guides/tools/comp_perf.rst
+++ b/doc/guides/tools/comp_perf.rst
@@ -19,7 +19,7 @@ to check the throughput rate
 Limitations
 ~~~~~~~~~~~
 
-* Only supports the fixed compression and stateless operation.
+* Only supports the stateless operation.
 
 Command line options
 --------------------
@@ -42,7 +42,7 @@ Command line options
 
  ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
 
- ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+ ``--huffman-enc [fixed/dynamic/default]``: Huffman encoding (default: dynamic)
 
  ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v3 5/5] app/compress-perf: code refactoring
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
                       ` (3 preceding siblings ...)
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
@ 2018-11-23 13:06     ` Tomasz Jozwiak
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 13:06 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Code refactoring to separate validation from benchmarking part.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/Makefile                   |   2 +
 app/test-compress-perf/comp_perf_options.h        |  12 +
 app/test-compress-perf/comp_perf_test_benchmark.c | 291 +++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 337 ++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 366 +---------------------
 app/test-compress-perf/meson.build                |   4 +-
 8 files changed, 685 insertions(+), 353 deletions(-)
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h

diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index 8aa7a22..d20e17e 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -12,5 +12,7 @@ CFLAGS += -O3
 # all source are stored in SRCS-y
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
+SRCS-y += comp_perf_test_verify.c
+SRCS-y += comp_perf_test_benchmark.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index 7516ea0..ca96a3c 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -2,6 +2,9 @@
  * Copyright(c) 2018 Intel Corporation
  */
 
+#ifndef _COMP_PERF_OPS_
+#define _COMP_PERF_OPS_
+
 #define MAX_DRIVER_NAME		64
 #define MAX_INPUT_FILE_NAME	64
 #define MAX_LIST		32
@@ -46,6 +49,13 @@ struct comp_test_data {
 	/* Store TSC duration for all levels (including level 0) */
 	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
 	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+	double ratio;
+	double comp_gbps;
+	double decomp_gbps;
+	double comp_tsc_byte;
+	double decomp_tsc_byte;
 };
 
 int
@@ -57,3 +67,5 @@ comp_perf_options_default(struct comp_test_data *test_data);
 
 int
 comp_perf_options_check(struct comp_test_data *test_data);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c
new file mode 100644
index 0000000..ae5f204
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.c
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_benchmark.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	tsc_start = rte_rdtsc();
+	num_iter = test_data->num_iter;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	tsc_end = rte_rdtsc();
+	tsc_duration = tsc_end - tsc_start;
+
+	if (type == RTE_COMP_COMPRESS)
+		test_data->comp_tsc_duration[level] =
+				tsc_duration / num_iter;
+	else
+		test_data->decomp_tsc_duration[level] =
+				tsc_duration / num_iter;
+
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level) {
+	int i, ret = EXIT_SUCCESS;
+
+	/*
+	 * Run the tests twice, discarding the first performance
+	 * results, before the cache is warmed up
+	 */
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->comp_tsc_byte =
+			(double)(test_data->comp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->decomp_tsc_byte =
+			(double)(test_data->decomp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->comp_gbps = rte_get_tsc_hz() / test_data->comp_tsc_byte * 8 /
+			1000000000;
+
+	test_data->decomp_gbps = rte_get_tsc_hz() / test_data->decomp_tsc_byte
+			* 8 / 1000000000;
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h
new file mode 100644
index 0000000..b193445
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_BENCHMARK_
+#define _COMP_PERF_TEST_BENCHMARK_
+
+#include "comp_perf_options.h"
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
new file mode 100644
index 0000000..2ce2c31
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_verify.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(op->m_dst,
+								    uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level) {
+	int ret = EXIT_SUCCESS;
+
+	test_data->ratio = 0;
+
+	if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+		      test_data->compressed_data,
+		      &test_data->comp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+		      test_data->decompressed_data,
+		      &test_data->decomp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->decomp_data_sz != test_data->input_data_sz) {
+		RTE_LOG(ERR, USER1,
+	   "Decompressed data length not equal to input data length\n");
+		RTE_LOG(ERR, USER1,
+			"Decompressed size = %zu, expected = %zu\n",
+			test_data->decomp_data_sz, test_data->input_data_sz);
+		ret = EXIT_FAILURE;
+		goto end;
+	} else {
+		if (memcmp(test_data->decompressed_data,
+				test_data->input_data,
+				test_data->input_data_sz) != 0) {
+			RTE_LOG(ERR, USER1,
+		    "Decompressed data is not the same as file data\n");
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->ratio = (double) test_data->comp_data_sz /
+			test_data->input_data_sz * 100;
+
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_verify.h b/app/test-compress-perf/comp_perf_test_verify.h
new file mode 100644
index 0000000..67c6b49
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_VERIFY_
+#define _COMP_PERF_TEST_VERIFY_
+
+#include "comp_perf_options.h"
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 5950c96..4de913e 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,10 +5,11 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
-#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
+#include "comp_perf_test_verify.h"
+#include "comp_perf_test_benchmark.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
@@ -442,285 +443,7 @@ free_bufs(struct comp_test_data *test_data)
 	}
 }
 
-static int
-main_loop(struct comp_test_data *test_data, uint8_t level,
-			enum rte_comp_xform_type type,
-			uint8_t *output_data_ptr,
-			size_t *output_data_sz,
-			unsigned int benchmarking)
-{
-	uint8_t dev_id = test_data->cdev_id;
-	uint32_t i, iter, num_iter;
-	struct rte_comp_op **ops, **deq_ops;
-	void *priv_xform = NULL;
-	struct rte_comp_xform xform;
-	size_t output_size = 0;
-	struct rte_mbuf **input_bufs, **output_bufs;
-	int res = 0;
-	int allocated = 0;
-
-	if (test_data == NULL || !test_data->burst_sz) {
-		RTE_LOG(ERR, USER1,
-			"Unknown burst size\n");
-		return -1;
-	}
-
-	ops = rte_zmalloc_socket(NULL,
-		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
-		0, rte_socket_id());
-
-	if (ops == NULL) {
-		RTE_LOG(ERR, USER1,
-			"Can't allocate memory for ops strucures\n");
-		return -1;
-	}
-
-	deq_ops = &ops[test_data->total_bufs];
-
-	if (type == RTE_COMP_COMPRESS) {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_COMPRESS,
-			.compress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.deflate.huffman = test_data->huffman_enc,
-				.level = level,
-				.window_size = test_data->window_sz,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->decomp_bufs;
-		output_bufs = test_data->comp_bufs;
-	} else {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_DECOMPRESS,
-			.decompress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.window_size = test_data->window_sz,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->comp_bufs;
-		output_bufs = test_data->decomp_bufs;
-	}
-
-	/* Create private xform */
-	if (rte_compressdev_private_xform_create(dev_id, &xform,
-			&priv_xform) < 0) {
-		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
-		res = -1;
-		goto end;
-	}
 
-	uint64_t tsc_start, tsc_end, tsc_duration;
-
-	tsc_start = tsc_end = tsc_duration = 0;
-	if (benchmarking) {
-		tsc_start = rte_rdtsc();
-		num_iter = test_data->num_iter;
-	} else
-		num_iter = 1;
-
-	for (iter = 0; iter < num_iter; iter++) {
-		uint32_t total_ops = test_data->total_bufs;
-		uint32_t remaining_ops = test_data->total_bufs;
-		uint32_t total_deq_ops = 0;
-		uint32_t total_enq_ops = 0;
-		uint16_t ops_unused = 0;
-		uint16_t num_enq = 0;
-		uint16_t num_deq = 0;
-
-		output_size = 0;
-
-		while (remaining_ops > 0) {
-			uint16_t num_ops = RTE_MIN(remaining_ops,
-						   test_data->burst_sz);
-			uint16_t ops_needed = num_ops - ops_unused;
-
-			/*
-			 * Move the unused operations from the previous
-			 * enqueue_burst call to the front, to maintain order
-			 */
-			if ((ops_unused > 0) && (num_enq > 0)) {
-				size_t nb_b_to_mov =
-				      ops_unused * sizeof(struct rte_comp_op *);
-
-				memmove(ops, &ops[num_enq], nb_b_to_mov);
-			}
-
-			/* Allocate compression operations */
-			if (ops_needed && !rte_comp_op_bulk_alloc(
-						test_data->op_pool,
-						&ops[ops_unused],
-						ops_needed)) {
-				RTE_LOG(ERR, USER1,
-				      "Could not allocate enough operations\n");
-				res = -1;
-				goto end;
-			}
-			allocated += ops_needed;
-
-			for (i = 0; i < ops_needed; i++) {
-				/*
-				 * Calculate next buffer to attach to operation
-				 */
-				uint32_t buf_id = total_enq_ops + i +
-						ops_unused;
-				uint16_t op_id = ops_unused + i;
-				/* Reset all data in output buffers */
-				struct rte_mbuf *m = output_bufs[buf_id];
-
-				m->pkt_len = test_data->seg_sz * m->nb_segs;
-				while (m) {
-					m->data_len = m->buf_len - m->data_off;
-					m = m->next;
-				}
-				ops[op_id]->m_src = input_bufs[buf_id];
-				ops[op_id]->m_dst = output_bufs[buf_id];
-				ops[op_id]->src.offset = 0;
-				ops[op_id]->src.length =
-					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
-				ops[op_id]->dst.offset = 0;
-				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
-				ops[op_id]->input_chksum = buf_id;
-				ops[op_id]->private_xform = priv_xform;
-			}
-
-			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
-								num_ops);
-			ops_unused = num_ops - num_enq;
-			remaining_ops -= num_enq;
-			total_enq_ops += num_enq;
-
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-							   deq_ops,
-							   test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst, 0,
-						op->produced, output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							  op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-							RTE_MIN(remaining_data,
-							     test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-
-		/* Dequeue the last operations */
-		while (total_deq_ops < total_ops) {
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-						deq_ops, test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst, 0,
-						op->produced, output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-						RTE_MIN(remaining_data,
-							test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-	}
-
-	if (benchmarking) {
-		tsc_end = rte_rdtsc();
-		tsc_duration = tsc_end - tsc_start;
-
-		if (type == RTE_COMP_COMPRESS)
-			test_data->comp_tsc_duration[level] =
-					tsc_duration / num_iter;
-		else
-			test_data->decomp_tsc_duration[level] =
-					tsc_duration / num_iter;
-	}
-
-	if (benchmarking == 0 && output_data_sz)
-		*output_data_sz = output_size;
-end:
-	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
-	rte_compressdev_private_xform_free(dev_id, priv_xform);
-	rte_free(ops);
-	return res;
-}
 
 int
 main(int argc, char **argv)
@@ -743,6 +466,7 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	ret = EXIT_SUCCESS;
 	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
@@ -785,9 +509,6 @@ main(int argc, char **argv)
 	else
 		level = test_data->level.list[0];
 
-	size_t comp_data_sz;
-	size_t decomp_data_sz;
-
 	printf("Burst size = %u\n", test_data->burst_sz);
 	printf("File size = %zu\n", test_data->input_data_sz);
 
@@ -798,84 +519,27 @@ main(int argc, char **argv)
 
 	cleanup = ST_DURING_TEST;
 	while (level <= test_data->level.max) {
+
 		/*
 		 * Run a first iteration, to verify compression and
 		 * get the compression ratio for the level
 		 */
-		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-			      test_data->compressed_data,
-			      &comp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-			      test_data->decompressed_data,
-			      &decomp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (decomp_data_sz != test_data->input_data_sz) {
-			RTE_LOG(ERR, USER1,
-		   "Decompressed data length not equal to input data length\n");
-			RTE_LOG(ERR, USER1,
-				"Decompressed size = %zu, expected = %zu\n",
-				decomp_data_sz, test_data->input_data_sz);
-			ret = EXIT_FAILURE;
-			goto end;
-		} else {
-			if (memcmp(test_data->decompressed_data,
-					test_data->input_data,
-					test_data->input_data_sz) != 0) {
-				RTE_LOG(ERR, USER1,
-			    "Decompressed data is not the same as file data\n");
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		double ratio = (double) comp_data_sz /
-						test_data->input_data_sz * 100;
+		if (cperf_verification(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		/*
-		 * Run the tests twice, discarding the first performance
-		 * results, before the cache is warmed up
+		 * Run benchmarking test
 		 */
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		uint64_t comp_tsc_duration =
-				test_data->comp_tsc_duration[level];
-		double comp_tsc_byte = (double)comp_tsc_duration /
-						test_data->input_data_sz;
-		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
-				1000000000;
-		uint64_t decomp_tsc_duration =
-				test_data->decomp_tsc_duration[level];
-		double decomp_tsc_byte = (double)decomp_tsc_duration /
-						test_data->input_data_sz;
-		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
-				1000000000;
+		if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
 					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
-		       level, comp_data_sz, ratio, comp_tsc_duration,
-		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
-		       decomp_tsc_byte, decomp_gbps);
+		       level, test_data->comp_data_sz, test_data->ratio,
+		       test_data->comp_tsc_duration[level],
+		       test_data->comp_tsc_byte, test_data->comp_gbps,
+		       test_data->decomp_tsc_duration[level],
+		       test_data->decomp_tsc_byte, test_data->decomp_gbps);
 
 		if (test_data->level.inc != 0)
 			level += test_data->level.inc;
@@ -886,8 +550,6 @@ main(int argc, char **argv)
 		}
 	}
 
-	ret = EXIT_SUCCESS;
-
 end:
 	switch (cleanup) {
 
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index ba6d64d..ec73e5e 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -3,5 +3,7 @@
 
 allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
-		'main.c')
+		'main.c',
+		'comp_perf_test_verify.c',
+		'comp_perf_test_benchmark.c')
 deps = ['compressdev']
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf
  2018-11-23 13:06   ` [dpdk-dev] [PATCH v3 0/5] " Tomasz Jozwiak
                       ` (4 preceding siblings ...)
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
@ 2018-11-23 14:27     ` Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser Tomasz Jozwiak
                         ` (5 more replies)
  5 siblings, 6 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v4 changes:
  - fixed checkpatch issues
  - code cleanup

  Note: The 19.02 release notes will be updated once the file has been created

v3 changes:
  - Added dynamic compression
  - Code refactoring to separate validation
    from benchmarking part
  - Updated documentation
  - Added fail detection from rte_compressdev_enqueue_burst
    and rte_compressdev_dequeue_burst functions
  - Code cleanup
  
v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation

Opens:
  Comment from Shally Verma re removig `--max-num-sgl-segs'
  option from command line (it can be done after compression API
  change, (eg.: adding new capability field into the
  rte_compressdev_info struct)


Tomasz Jozwiak (5):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files
  app/compress-perf: add dynamic compression test
  app/compress-perf: code refactoring

 MAINTAINERS                                       |   5 +
 app/Makefile                                      |   4 +
 app/meson.build                                   |   1 +
 app/test-compress-perf/Makefile                   |  18 +
 app/test-compress-perf/comp_perf_options.h        |  71 +++
 app/test-compress-perf/comp_perf_options_parse.c  | 596 ++++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.c | 292 +++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 339 ++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 590 +++++++++++++++++++++
 app/test-compress-perf/meson.build                |   9 +
 config/common_base                                |   5 +
 doc/guides/rel_notes/release_18_11.rst            |   4 +
 doc/guides/tools/comp_perf.rst                    |  75 +++
 doc/guides/tools/index.rst                        |   1 +
 16 files changed, 2036 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
@ 2018-11-23 14:27       ` Tomasz Jozwiak
  2018-11-23 15:10         ` Varghese, Vipin
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
                         ` (4 subsequent siblings)
  5 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 596 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 740 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index a9a026b..47a2a86 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..5b9ea26
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,596 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 16)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
+		"		(default: dynamic)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (strcmp(test_data->driver_name, "") == 0) {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (strcmp(test_data->input_file, "") == 0) {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index d12ae98..2ab4b7b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 2/5] app/compress-perf: add performance measurement
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-11-23 14:27       ` Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
                         ` (3 subsequent siblings)
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c |   8 +-
 app/test-compress-perf/main.c                    | 886 ++++++++++++++++++++++-
 2 files changed, 883 insertions(+), 11 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 5b9ea26..fd5d31c 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
-		},
-		{
-			"dynamic",
-			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
-	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..5950c96 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,14 +5,728 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_COMPRESSED_BUF_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+/* Cleanup state machine */
+static enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_PREPARE_BUF,
+	ST_DURING_TEST
+} cleanup = ST_CLEAR;
+
+static int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static int
+comp_perf_check_capabilities(struct comp_test_data *test_data)
+{
+	const struct rte_compressdev_capabilities *cap;
+
+	cap = rte_compressdev_capability_get(test_data->cdev_id,
+					     RTE_COMP_ALGO_DEFLATE);
+
+	if (cap == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not support DEFLATE\n");
+		return -1;
+	}
+
+	uint64_t comp_flags = cap->comp_feature_flags;
+
+	/* Huffman enconding */
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Fixed Huffman\n");
+		return -1;
+	}
+
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Dynamic Huffman\n");
+		return -1;
+	}
+
+	/* Window size */
+	if (test_data->window_sz != -1) {
+		if (param_range_check(test_data->window_sz, &cap->window_size)
+				< 0) {
+			RTE_LOG(ERR, USER1,
+				"Compress device does not support "
+				"this window size\n");
+			return -1;
+		}
+	} else
+		/* Set window size to PMD maximum if none was specified */
+		test_data->window_sz = cap->window_size.max;
+
+	/* Check if chained mbufs is supported */
+	if (test_data->max_sgl_segs > 1  &&
+			(comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
+		RTE_LOG(INFO, USER1, "Compress device does not support "
+				"chained mbufs. Max SGL segments set to 1\n");
+		test_data->max_sgl_segs = 1;
+	}
+
+	/* Level 0 support */
+	if (test_data->level.min == 0 &&
+			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
+		RTE_LOG(ERR, USER1, "Compress device does not support "
+				"level 0 (no compression)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+comp_perf_allocate_memory(struct comp_test_data *test_data)
+{
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	cleanup = ST_MEMORY_ALLOC;
+	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	test_data->op_pool = rte_comp_op_pool_create("op_pool",
+				  test_data->total_bufs,
+				  0, 0, rte_socket_id());
+	if (test_data->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	test_data->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+						+ MIN_COMPRESSED_BUF_SIZE, 0,
+				rte_socket_id());
+	if (test_data->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (test_data->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->comp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+comp_perf_dump_input_data(struct comp_test_data *test_data)
+{
+	FILE *f = fopen(test_data->input_file, "r");
+	int ret = -1;
+
+	if (f == NULL) {
+		RTE_LOG(ERR, USER1, "Input file could not be opened\n");
+		return -1;
+	}
+
+	if (fseek(f, 0, SEEK_END) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+	size_t actual_file_sz = ftell(f);
+	/* If extended input data size has not been set,
+	 * input data size = file size
+	 */
+
+	if (test_data->input_data_sz == 0)
+		test_data->input_data_sz = actual_file_sz;
+
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+
+	test_data->input_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0, rte_socket_id());
+
+	if (test_data->input_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		goto end;
+	}
+
+	size_t remaining_data = test_data->input_data_sz;
+	uint8_t *data = test_data->input_data;
+
+	while (remaining_data > 0) {
+		size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
+
+		if (fread(data, data_to_read, 1, f) != 1) {
+			RTE_LOG(ERR, USER1, "Input file could not be read\n");
+			goto end;
+		}
+		if (fseek(f, 0, SEEK_SET) != 0) {
+			RTE_LOG(ERR, USER1,
+				"Size of input could not be calculated\n");
+			goto end;
+		}
+		remaining_data -= data_to_read;
+		data += data_to_read;
+	}
+
+	if (test_data->input_data_sz > actual_file_sz)
+		RTE_LOG(INFO, USER1,
+		  "%zu bytes read from file %s, extending the file %.2f times\n",
+			test_data->input_data_sz, test_data->input_file,
+			(double)test_data->input_data_sz/actual_file_sz);
+	else
+		RTE_LOG(INFO, USER1,
+			"%zu bytes read from file %s\n",
+			test_data->input_data_sz, test_data->input_file);
+
+	ret = 0;
+
+end:
+	fclose(f);
+	return ret;
+}
+
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
+{
+	uint8_t enabled_cdev_count;
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
+
+	if (enabled_cdev_count > 1)
+		RTE_LOG(INFO, USER1,
+			"Only the first compress device will be used\n");
+
+	test_data->cdev_id = enabled_cdevs[0];
+
+	if (comp_perf_check_capabilities(test_data) < 0)
+		return -1;
+
+	/* Configure compressdev (one device, one queue pair) */
+	struct rte_compressdev_config config = {
+		.socket_id = rte_socket_id(),
+		.nb_queue_pairs = 1,
+		.max_nb_priv_xforms = NUM_MAX_XFORMS,
+		.max_nb_streams = 0
+	};
+
+	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
+		RTE_LOG(ERR, USER1, "Device configuration failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
+			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
+		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_start(test_data->cdev_id) < 0) {
+		RTE_LOG(ERR, USER1, "Device could not be started\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+prepare_bufs(struct comp_test_data *test_data)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		test_data->decomp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+		if (test_data->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		cleanup = ST_PREPARE_BUF;
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		test_data->comp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->comp_buf_pool);
+		if (test_data->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->comp_bufs[i],
+					test_data->seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+free_bufs(struct comp_test_data *test_data)
+{
+	uint32_t i;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		rte_pktmbuf_free(test_data->comp_bufs[i]);
+		rte_pktmbuf_free(test_data->decomp_bufs[i]);
+	}
+}
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz,
+			unsigned int benchmarking)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	if (benchmarking) {
+		tsc_start = rte_rdtsc();
+		num_iter = test_data->num_iter;
+	} else
+		num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							  op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (benchmarking) {
+		tsc_end = rte_rdtsc();
+		tsc_duration = tsc_end - tsc_start;
+
+		if (type == RTE_COMP_COMPRESS)
+			test_data->comp_tsc_duration[level] =
+					tsc_duration / num_iter;
+		else
+			test_data->decomp_tsc_duration[level] =
+					tsc_duration / num_iter;
+	}
+
+	if (benchmarking == 0 && output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	uint8_t level, level_idx = 0;
+	int ret, i;
 	struct comp_test_data *test_data;
 
 	/* Initialise DPDK EAL */
@@ -29,24 +743,186 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
 	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
 		RTE_LOG(ERR, USER1,
 			"Parsing one or more user options failed\n");
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
 	}
 
 	if (comp_perf_options_check(test_data) < 0) {
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
+	}
+
+	if (comp_perf_initialize_compressdev(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_COMPDEV;
+	if (comp_perf_dump_input_data(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_INPUT_DATA;
+	if (comp_perf_allocate_memory(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (prepare_bufs(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->level.inc != 0)
+		level = test_data->level.min;
+	else
+		level = test_data->level.list[0];
+
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+
+	printf("Burst size = %u\n", test_data->burst_sz);
+	printf("File size = %zu\n", test_data->input_data_sz);
+
+	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
+		"Level", "Comp size", "Comp ratio [%]",
+		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
+		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+
+	cleanup = ST_DURING_TEST;
+	while (level <= test_data->level.max) {
+		/*
+		 * Run a first iteration, to verify compression and
+		 * get the compression ratio for the level
+		 */
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+			      test_data->compressed_data,
+			      &comp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+			      test_data->decompressed_data,
+			      &decomp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (decomp_data_sz != test_data->input_data_sz) {
+			RTE_LOG(ERR, USER1,
+		   "Decompressed data length not equal to input data length\n");
+			RTE_LOG(ERR, USER1,
+				"Decompressed size = %zu, expected = %zu\n",
+				decomp_data_sz, test_data->input_data_sz);
+			ret = EXIT_FAILURE;
+			goto end;
+		} else {
+			if (memcmp(test_data->decompressed_data,
+					test_data->input_data,
+					test_data->input_data_sz) != 0) {
+				RTE_LOG(ERR, USER1,
+			    "Decompressed data is not the same as file data\n");
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		double ratio = (double) comp_data_sz /
+						test_data->input_data_sz * 100;
+
+		/*
+		 * Run the tests twice, discarding the first performance
+		 * results, before the cache is warmed up
+		 */
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		uint64_t comp_tsc_duration =
+				test_data->comp_tsc_duration[level];
+		double comp_tsc_byte = (double)comp_tsc_duration /
+						test_data->input_data_sz;
+		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
+				1000000000;
+		uint64_t decomp_tsc_duration =
+				test_data->decomp_tsc_duration[level];
+		double decomp_tsc_byte = (double)decomp_tsc_duration /
+						test_data->input_data_sz;
+		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
+				1000000000;
+
+		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
+					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
+		       level, comp_data_sz, ratio, comp_tsc_duration,
+		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
+		       decomp_tsc_byte, decomp_gbps);
+
+		if (test_data->level.inc != 0)
+			level += test_data->level.inc;
+		else {
+			if (++level_idx == test_data->level.count)
+				break;
+			level = test_data->level.list[level_idx];
+		}
 	}
 
 	ret = EXIT_SUCCESS;
 
-err:
-	rte_free(test_data);
+end:
+	switch (cleanup) {
 
+	case ST_DURING_TEST:
+	case ST_PREPARE_BUF:
+		free_bufs(test_data);
+		/* fallthrough */
+	case ST_MEMORY_ALLOC:
+		rte_free(test_data->decomp_bufs);
+		rte_free(test_data->comp_bufs);
+		rte_free(test_data->decompressed_data);
+		rte_free(test_data->compressed_data);
+		rte_mempool_free(test_data->op_pool);
+		rte_mempool_free(test_data->decomp_buf_pool);
+		rte_mempool_free(test_data->comp_buf_pool);
+		/* fallthrough */
+	case ST_INPUT_DATA:
+		rte_free(test_data->input_data);
+		/* fallthrough */
+	case ST_COMPDEV:
+		if (test_data->cdev_id != -1)
+			rte_compressdev_stop(test_data->cdev_id);
+		/* fallthrough */
+	case ST_TEST_DATA:
+		rte_free(test_data);
+		/* fallthrough */
+	case ST_CLEAR:
+	default:
+		i = rte_eal_cleanup();
+		if (i) {
+			RTE_LOG(ERR, USER1,
+				"Error from rte_eal_cleanup(), %d\n", i);
+			ret = i;
+		}
+		break;
+	}
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-11-23 14:27       ` Tomasz Jozwiak
  2018-11-23 15:00         ` Varghese, Vipin
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
                         ` (2 subsequent siblings)
  5 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added:
 -  initial version of compression performance test
    description file.
 -  release note in release_18_11.rst

Updated index.rst file

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 MAINTAINERS                            |  5 +++
 doc/guides/rel_notes/release_18_11.rst |  5 +++
 doc/guides/tools/comp_perf.rst         | 75 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/index.rst             |  1 +
 4 files changed, 86 insertions(+)
 create mode 100644 doc/guides/tools/comp_perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 71ba312..dd0c131 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1242,6 +1242,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Compression performance test application
+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
+F: app/test-compress-perf/
+F: doc/guides/tools/comp_perf.rst
+
 Crypto performance test application
 M: Declan Doherty <declan.doherty@intel.com>
 F: app/test-crypto-perf/
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 32ff0e5..d44cf30 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -328,6 +328,11 @@ New Features
   additional command-line parameter values from the "DPDK_TEST_PARAMS"
   environment variable to make this application easier to use.
 
+* **Added a compression performance test tool.**
+
+   Added a new performance test tool to test the compressdev PMD. The tool tests
+   compression ratio and compression throughput. Dynamic compression test is not
+   supported yet.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
new file mode 100644
index 0000000..1428348
--- /dev/null
+++ b/doc/guides/tools/comp_perf.rst
@@ -0,0 +1,75 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+dpdk-test-compress-perf Application
+===================================
+
+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
+utility that allows measuring performance parameters of PMDs available in the
+compress tree. The application reads the data from a file (--input-file),
+dumps all the file into a buffer and fills out the data of input mbufs,
+which are passed to compress device with compression operations.
+Then, the output buffers are fed into the decompression stage, and the resulting
+data is compared against the original data (verification phase). After that,
+a number of iterations are performed, compressing first and decompressing later,
+to check the throughput rate
+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
+
+
+Limitations
+~~~~~~~~~~~
+
+* Only supports the fixed compression and stateless operation.
+
+Command line options
+--------------------
+
+ ``--driver-name NAME``: compress driver to use
+
+ ``--input-file NAME``: file to compress and decompress
+
+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
+
+ ``--seg-sz N``: size of segment to store the data (default: 2048)
+
+ ``--burst-sz N``: compress operation burst size
+
+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
+
+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 16)
+
+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
+
+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
+
+ ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+
+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
+
+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
+
+ ``-h``: prints this help
+
+
+Compiling the Application
+-------------------------
+
+**Step 1: PMD setting**
+
+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
+can be disabled by default in the build configuration file ``common_base``.
+The compression device drivers PMD which should be tested can be enabled by setting::
+
+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
+
+
+Running the Application
+-----------------------
+
+The tool application has a number of command line options. Here is the sample command line:
+
+.. code-block:: console
+
+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
+
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index a6e2c4c..24235ba 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -42,3 +42,4 @@ DPDK Tools User Guides
     testbbdev
     cryptoperf
     testeventdev
+    comp_perf
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 4/5] app/compress-perf: add dynamic compression test
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
                         ` (2 preceding siblings ...)
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-23 14:27       ` Tomasz Jozwiak
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

    Added dynamic compression feature into compression perf. test.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c | 6 +++++-
 doc/guides/rel_notes/release_18_11.rst           | 3 +--
 doc/guides/tools/comp_perf.rst                   | 4 ++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index fd5d31c..dbe8135 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -431,6 +431,10 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -567,7 +571,7 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->pool_sz = 8192;
 	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index d44cf30..848cc06 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -331,8 +331,7 @@ New Features
 * **Added a compression performance test tool.**
 
    Added a new performance test tool to test the compressdev PMD. The tool tests
-   compression ratio and compression throughput. Dynamic compression test is not
-   supported yet.
+   compression ratio and compression throughput.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
index 1428348..abb727d 100644
--- a/doc/guides/tools/comp_perf.rst
+++ b/doc/guides/tools/comp_perf.rst
@@ -19,7 +19,7 @@ to check the throughput rate
 Limitations
 ~~~~~~~~~~~
 
-* Only supports the fixed compression and stateless operation.
+* Only supports the stateless operation.
 
 Command line options
 --------------------
@@ -42,7 +42,7 @@ Command line options
 
  ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
 
- ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+ ``--huffman-enc [fixed/dynamic/default]``: Huffman encoding (default: dynamic)
 
  ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v4 5/5] app/compress-perf: code refactoring
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
                         ` (3 preceding siblings ...)
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
@ 2018-11-23 14:27       ` Tomasz Jozwiak
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
  5 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-11-23 14:27 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Code refactoring to separate validation from benchmarking part.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/Makefile                   |   2 +
 app/test-compress-perf/comp_perf_options.h        |  12 +
 app/test-compress-perf/comp_perf_test_benchmark.c | 292 +++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 339 ++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 366 +---------------------
 app/test-compress-perf/meson.build                |   4 +-
 8 files changed, 688 insertions(+), 353 deletions(-)
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h

diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index 8aa7a22..d20e17e 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -12,5 +12,7 @@ CFLAGS += -O3
 # all source are stored in SRCS-y
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
+SRCS-y += comp_perf_test_verify.c
+SRCS-y += comp_perf_test_benchmark.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index 7516ea0..ca96a3c 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -2,6 +2,9 @@
  * Copyright(c) 2018 Intel Corporation
  */
 
+#ifndef _COMP_PERF_OPS_
+#define _COMP_PERF_OPS_
+
 #define MAX_DRIVER_NAME		64
 #define MAX_INPUT_FILE_NAME	64
 #define MAX_LIST		32
@@ -46,6 +49,13 @@ struct comp_test_data {
 	/* Store TSC duration for all levels (including level 0) */
 	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
 	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+	double ratio;
+	double comp_gbps;
+	double decomp_gbps;
+	double comp_tsc_byte;
+	double decomp_tsc_byte;
 };
 
 int
@@ -57,3 +67,5 @@ comp_perf_options_default(struct comp_test_data *test_data);
 
 int
 comp_perf_options_check(struct comp_test_data *test_data);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c
new file mode 100644
index 0000000..31ea1d6
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_benchmark.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	tsc_start = rte_rdtsc();
+	num_iter = test_data->num_iter;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	tsc_end = rte_rdtsc();
+	tsc_duration = tsc_end - tsc_start;
+
+	if (type == RTE_COMP_COMPRESS)
+		test_data->comp_tsc_duration[level] =
+				tsc_duration / num_iter;
+	else
+		test_data->decomp_tsc_duration[level] =
+				tsc_duration / num_iter;
+
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level)
+{
+	int i, ret = EXIT_SUCCESS;
+
+	/*
+	 * Run the tests twice, discarding the first performance
+	 * results, before the cache is warmed up
+	 */
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->comp_tsc_byte =
+			(double)(test_data->comp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->decomp_tsc_byte =
+			(double)(test_data->decomp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->comp_gbps = rte_get_tsc_hz() / test_data->comp_tsc_byte * 8 /
+			1000000000;
+
+	test_data->decomp_gbps = rte_get_tsc_hz() / test_data->decomp_tsc_byte
+			* 8 / 1000000000;
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h
new file mode 100644
index 0000000..b193445
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_BENCHMARK_
+#define _COMP_PERF_TEST_BENCHMARK_
+
+#include "comp_perf_options.h"
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
new file mode 100644
index 0000000..f5ea105
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_verify.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(op->m_dst,
+								    uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst,
+								 op->dst.offset,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level)
+{
+	int ret = EXIT_SUCCESS;
+
+	test_data->ratio = 0;
+
+	if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+		      test_data->compressed_data,
+		      &test_data->comp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+		      test_data->decompressed_data,
+		      &test_data->decomp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->decomp_data_sz != test_data->input_data_sz) {
+		RTE_LOG(ERR, USER1,
+	   "Decompressed data length not equal to input data length\n");
+		RTE_LOG(ERR, USER1,
+			"Decompressed size = %zu, expected = %zu\n",
+			test_data->decomp_data_sz, test_data->input_data_sz);
+		ret = EXIT_FAILURE;
+		goto end;
+	} else {
+		if (memcmp(test_data->decompressed_data,
+				test_data->input_data,
+				test_data->input_data_sz) != 0) {
+			RTE_LOG(ERR, USER1,
+		    "Decompressed data is not the same as file data\n");
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->ratio = (double) test_data->comp_data_sz /
+			test_data->input_data_sz * 100;
+
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_verify.h b/app/test-compress-perf/comp_perf_test_verify.h
new file mode 100644
index 0000000..67c6b49
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_VERIFY_
+#define _COMP_PERF_TEST_VERIFY_
+
+#include "comp_perf_options.h"
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 5950c96..4de913e 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,10 +5,11 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
-#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
+#include "comp_perf_test_verify.h"
+#include "comp_perf_test_benchmark.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
@@ -442,285 +443,7 @@ free_bufs(struct comp_test_data *test_data)
 	}
 }
 
-static int
-main_loop(struct comp_test_data *test_data, uint8_t level,
-			enum rte_comp_xform_type type,
-			uint8_t *output_data_ptr,
-			size_t *output_data_sz,
-			unsigned int benchmarking)
-{
-	uint8_t dev_id = test_data->cdev_id;
-	uint32_t i, iter, num_iter;
-	struct rte_comp_op **ops, **deq_ops;
-	void *priv_xform = NULL;
-	struct rte_comp_xform xform;
-	size_t output_size = 0;
-	struct rte_mbuf **input_bufs, **output_bufs;
-	int res = 0;
-	int allocated = 0;
-
-	if (test_data == NULL || !test_data->burst_sz) {
-		RTE_LOG(ERR, USER1,
-			"Unknown burst size\n");
-		return -1;
-	}
-
-	ops = rte_zmalloc_socket(NULL,
-		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
-		0, rte_socket_id());
-
-	if (ops == NULL) {
-		RTE_LOG(ERR, USER1,
-			"Can't allocate memory for ops strucures\n");
-		return -1;
-	}
-
-	deq_ops = &ops[test_data->total_bufs];
-
-	if (type == RTE_COMP_COMPRESS) {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_COMPRESS,
-			.compress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.deflate.huffman = test_data->huffman_enc,
-				.level = level,
-				.window_size = test_data->window_sz,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->decomp_bufs;
-		output_bufs = test_data->comp_bufs;
-	} else {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_DECOMPRESS,
-			.decompress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.window_size = test_data->window_sz,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->comp_bufs;
-		output_bufs = test_data->decomp_bufs;
-	}
-
-	/* Create private xform */
-	if (rte_compressdev_private_xform_create(dev_id, &xform,
-			&priv_xform) < 0) {
-		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
-		res = -1;
-		goto end;
-	}
 
-	uint64_t tsc_start, tsc_end, tsc_duration;
-
-	tsc_start = tsc_end = tsc_duration = 0;
-	if (benchmarking) {
-		tsc_start = rte_rdtsc();
-		num_iter = test_data->num_iter;
-	} else
-		num_iter = 1;
-
-	for (iter = 0; iter < num_iter; iter++) {
-		uint32_t total_ops = test_data->total_bufs;
-		uint32_t remaining_ops = test_data->total_bufs;
-		uint32_t total_deq_ops = 0;
-		uint32_t total_enq_ops = 0;
-		uint16_t ops_unused = 0;
-		uint16_t num_enq = 0;
-		uint16_t num_deq = 0;
-
-		output_size = 0;
-
-		while (remaining_ops > 0) {
-			uint16_t num_ops = RTE_MIN(remaining_ops,
-						   test_data->burst_sz);
-			uint16_t ops_needed = num_ops - ops_unused;
-
-			/*
-			 * Move the unused operations from the previous
-			 * enqueue_burst call to the front, to maintain order
-			 */
-			if ((ops_unused > 0) && (num_enq > 0)) {
-				size_t nb_b_to_mov =
-				      ops_unused * sizeof(struct rte_comp_op *);
-
-				memmove(ops, &ops[num_enq], nb_b_to_mov);
-			}
-
-			/* Allocate compression operations */
-			if (ops_needed && !rte_comp_op_bulk_alloc(
-						test_data->op_pool,
-						&ops[ops_unused],
-						ops_needed)) {
-				RTE_LOG(ERR, USER1,
-				      "Could not allocate enough operations\n");
-				res = -1;
-				goto end;
-			}
-			allocated += ops_needed;
-
-			for (i = 0; i < ops_needed; i++) {
-				/*
-				 * Calculate next buffer to attach to operation
-				 */
-				uint32_t buf_id = total_enq_ops + i +
-						ops_unused;
-				uint16_t op_id = ops_unused + i;
-				/* Reset all data in output buffers */
-				struct rte_mbuf *m = output_bufs[buf_id];
-
-				m->pkt_len = test_data->seg_sz * m->nb_segs;
-				while (m) {
-					m->data_len = m->buf_len - m->data_off;
-					m = m->next;
-				}
-				ops[op_id]->m_src = input_bufs[buf_id];
-				ops[op_id]->m_dst = output_bufs[buf_id];
-				ops[op_id]->src.offset = 0;
-				ops[op_id]->src.length =
-					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
-				ops[op_id]->dst.offset = 0;
-				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
-				ops[op_id]->input_chksum = buf_id;
-				ops[op_id]->private_xform = priv_xform;
-			}
-
-			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
-								num_ops);
-			ops_unused = num_ops - num_enq;
-			remaining_ops -= num_enq;
-			total_enq_ops += num_enq;
-
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-							   deq_ops,
-							   test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst, 0,
-						op->produced, output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							  op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-							RTE_MIN(remaining_data,
-							     test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-
-		/* Dequeue the last operations */
-		while (total_deq_ops < total_ops) {
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-						deq_ops, test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst, 0,
-						op->produced, output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-						RTE_MIN(remaining_data,
-							test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-	}
-
-	if (benchmarking) {
-		tsc_end = rte_rdtsc();
-		tsc_duration = tsc_end - tsc_start;
-
-		if (type == RTE_COMP_COMPRESS)
-			test_data->comp_tsc_duration[level] =
-					tsc_duration / num_iter;
-		else
-			test_data->decomp_tsc_duration[level] =
-					tsc_duration / num_iter;
-	}
-
-	if (benchmarking == 0 && output_data_sz)
-		*output_data_sz = output_size;
-end:
-	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
-	rte_compressdev_private_xform_free(dev_id, priv_xform);
-	rte_free(ops);
-	return res;
-}
 
 int
 main(int argc, char **argv)
@@ -743,6 +466,7 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	ret = EXIT_SUCCESS;
 	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
@@ -785,9 +509,6 @@ main(int argc, char **argv)
 	else
 		level = test_data->level.list[0];
 
-	size_t comp_data_sz;
-	size_t decomp_data_sz;
-
 	printf("Burst size = %u\n", test_data->burst_sz);
 	printf("File size = %zu\n", test_data->input_data_sz);
 
@@ -798,84 +519,27 @@ main(int argc, char **argv)
 
 	cleanup = ST_DURING_TEST;
 	while (level <= test_data->level.max) {
+
 		/*
 		 * Run a first iteration, to verify compression and
 		 * get the compression ratio for the level
 		 */
-		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-			      test_data->compressed_data,
-			      &comp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-			      test_data->decompressed_data,
-			      &decomp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (decomp_data_sz != test_data->input_data_sz) {
-			RTE_LOG(ERR, USER1,
-		   "Decompressed data length not equal to input data length\n");
-			RTE_LOG(ERR, USER1,
-				"Decompressed size = %zu, expected = %zu\n",
-				decomp_data_sz, test_data->input_data_sz);
-			ret = EXIT_FAILURE;
-			goto end;
-		} else {
-			if (memcmp(test_data->decompressed_data,
-					test_data->input_data,
-					test_data->input_data_sz) != 0) {
-				RTE_LOG(ERR, USER1,
-			    "Decompressed data is not the same as file data\n");
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		double ratio = (double) comp_data_sz /
-						test_data->input_data_sz * 100;
+		if (cperf_verification(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		/*
-		 * Run the tests twice, discarding the first performance
-		 * results, before the cache is warmed up
+		 * Run benchmarking test
 		 */
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		uint64_t comp_tsc_duration =
-				test_data->comp_tsc_duration[level];
-		double comp_tsc_byte = (double)comp_tsc_duration /
-						test_data->input_data_sz;
-		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
-				1000000000;
-		uint64_t decomp_tsc_duration =
-				test_data->decomp_tsc_duration[level];
-		double decomp_tsc_byte = (double)decomp_tsc_duration /
-						test_data->input_data_sz;
-		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
-				1000000000;
+		if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
 					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
-		       level, comp_data_sz, ratio, comp_tsc_duration,
-		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
-		       decomp_tsc_byte, decomp_gbps);
+		       level, test_data->comp_data_sz, test_data->ratio,
+		       test_data->comp_tsc_duration[level],
+		       test_data->comp_tsc_byte, test_data->comp_gbps,
+		       test_data->decomp_tsc_duration[level],
+		       test_data->decomp_tsc_byte, test_data->decomp_gbps);
 
 		if (test_data->level.inc != 0)
 			level += test_data->level.inc;
@@ -886,8 +550,6 @@ main(int argc, char **argv)
 		}
 	}
 
-	ret = EXIT_SUCCESS;
-
 end:
 	switch (cleanup) {
 
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index ba6d64d..ec73e5e 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -3,5 +3,7 @@
 
 allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
-		'main.c')
+		'main.c',
+		'comp_perf_test_verify.c',
+		'comp_perf_test_benchmark.c')
 deps = ['compressdev']
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files
  2018-11-23 13:06     ` [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-23 14:52       ` Varghese, Vipin
  2018-11-23 14:59         ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Varghese, Vipin @ 2018-11-23 14:52 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Jozwiak, TomaszX,
	Shally.Verma, akhil.goyal

Hi Tomasz,

<snipped>

> +dpdk-test-compress-perf Application
> +===================================
> +

Suggestion to remove extra '='

> +The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit
> +(DPDK) utility that allows measuring performance parameters of PMDs

Is this test application or utility like procinfo or pdump?

> +available in the compress tree. The application reads the data from a
> +file (--input-file), dumps all the file into a buffer and fills out the
> +data of input mbufs, which are passed to compress device with compression
> operations.
> +Then, the output buffers are fed into the decompression stage, and the
> +resulting data is compared against the original data (verification
> +phase). After that, a number of iterations are performed, compressing
> +first and decompressing later, to check the throughput rate (showing
> +cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
> +
> +
> +Limitations
> +~~~~~~~~~~~
> +

Suggestion to remove extra '~'

> +* Only supports the fixed compression and stateless operation.
> +

Is the limitation of the application that it will not cover stateful?

> +Command line options
> +--------------------
> +
> + ``--driver-name NAME``: compress driver to use
> +
> + ``--input-file NAME``: file to compress and decompress
> +
> + ``--extended-input-sz N``: extend file data up to this size (default:
> + no extension)
> +
> + ``--seg-sz N``: size of segment to store the data (default: 2048)
> +
> + ``--burst-sz N``: compress operation burst size
> +
> + ``--pool-sz N``: mempool size for compress operations/mbufs (default:
> + 8192)
> +
> + ``--max-num-sgl-segs N``: maximum number of segments for each mbuf
> + (default: 16)
> +
> + ``--num-iter N``: number of times the file will be
> + compressed/decompressed (default: 10000)
> +
> + ``--operation [comp/decomp/comp_and_decomp]``: perform test on
> + compression, decompression or both operations
> +
> + ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
> +
> + ``--compress-level N``: compression level, which could be a single
> + value, list or range (default: range between 1 and 9)
> +
> + ``--window-sz N``: base two log value of compression window size
> + (default: max supported by PMD)
> +
> + ``-h``: prints this help
> +
> +
> +Compiling the Application
> +-------------------------
> +
> +**Step 1: PMD setting**
> +
> +The ``dpdk-test-compress-perf`` tool depends on compression device
> +drivers PMD which can be disabled by default in the build configuration file
> ``common_base``.
> +The compression device drivers PMD which should be tested can be enabled
> by setting::
> +
> +   CONFIG_RTE_LIBRTE_PMD_ISAL=y
> +

Does this application only run with ISAL? If yes will it be ok to mention this in limitation?

> +
> +Running the Application
> +-----------------------
> +
> +The tool application has a number of command line options. Here is the
> sample command line:
> +

Here we are using 'tool application'. Would either one of application or utility state the right usage?

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files
  2018-11-23 14:52       ` Varghese, Vipin
@ 2018-11-23 14:59         ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-23 14:59 UTC (permalink / raw)
  To: Varghese, Vipin, dev, Trahe, Fiona, Shally.Verma, akhil.goyal



> -----Original Message-----
> From: Varghese, Vipin
> Sent: Friday, November 23, 2018 3:53 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; Jozwiak, TomaszX
> <tomaszx.jozwiak@intel.com>; Shally.Verma@cavium.com;
> akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH v3 3/5] doc/guides/tools: add doc files
> 
> Hi Tomasz,
> 
> <snipped>
> 
> > +dpdk-test-compress-perf Application
> > +===================================
> > +
> 
> Suggestion to remove extra '='
True , Thx



> 
> > +The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit
> > +(DPDK) utility that allows measuring performance parameters of PMDs
> 
> Is this test application or utility like procinfo or pdump?

'tool application' will change this.

> 
> > +available in the compress tree. The application reads the data from a
> > +file (--input-file), dumps all the file into a buffer and fills out
> > +the data of input mbufs, which are passed to compress device with
> > +compression
> > operations.
> > +Then, the output buffers are fed into the decompression stage, and
> > +the resulting data is compared against the original data
> > +(verification phase). After that, a number of iterations are
> > +performed, compressing first and decompressing later, to check the
> > +throughput rate (showing cycles/iteration, cycles/Byte and Gbps, for
> compression and decompression).
> > +
> > +
> > +Limitations
> > +~~~~~~~~~~~
> > +
> 
> Suggestion to remove extra '~'

True, will do

> 
> > +* Only supports the fixed compression and stateless operation.
> > +
> 
> Is the limitation of the application that it will not cover stateful?

In that initial version yes. Will be updated in future versions.


> 
> > +Command line options
> > +--------------------
> > +
> > + ``--driver-name NAME``: compress driver to use
> > +
> > + ``--input-file NAME``: file to compress and decompress
> > +
> > + ``--extended-input-sz N``: extend file data up to this size (default:
> > + no extension)
> > +
> > + ``--seg-sz N``: size of segment to store the data (default: 2048)
> > +
> > + ``--burst-sz N``: compress operation burst size
> > +
> > + ``--pool-sz N``: mempool size for compress operations/mbufs (default:
> > + 8192)
> > +
> > + ``--max-num-sgl-segs N``: maximum number of segments for each mbuf
> > + (default: 16)
> > +
> > + ``--num-iter N``: number of times the file will be
> > + compressed/decompressed (default: 10000)
> > +
> > + ``--operation [comp/decomp/comp_and_decomp]``: perform test on
> > + compression, decompression or both operations
> > +
> > + ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
> > +
> > + ``--compress-level N``: compression level, which could be a single
> > + value, list or range (default: range between 1 and 9)
> > +
> > + ``--window-sz N``: base two log value of compression window size
> > + (default: max supported by PMD)
> > +
> > + ``-h``: prints this help
> > +
> > +
> > +Compiling the Application
> > +-------------------------
> > +
> > +**Step 1: PMD setting**
> > +
> > +The ``dpdk-test-compress-perf`` tool depends on compression device
> > +drivers PMD which can be disabled by default in the build
> > +configuration file
> > ``common_base``.
> > +The compression device drivers PMD which should be tested can be
> > +enabled
> > by setting::
> > +
> > +   CONFIG_RTE_LIBRTE_PMD_ISAL=y
> > +
> 
> Does this application only run with ISAL? If yes will it be ok to mention this in
> limitation?

No, should be eg.:
Will update this

> 
> > +
> > +Running the Application
> > +-----------------------
> > +
> > +The tool application has a number of command line options. Here is
> > +the
> > sample command line:
> > +
> 
> Here we are using 'tool application'. Would either one of application or utility
> state the right usage?

True, will update this, Thx

--
Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-11-23 15:00         ` Varghese, Vipin
  2018-11-23 15:12           ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Varghese, Vipin @ 2018-11-23 15:00 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Jozwiak, TomaszX,
	Shally.Verma, akhil.goyal

Apologies Tomasz, I shared my comments in v3. sharing for v4

>  Crypto performance test application
>  M: Declan Doherty <declan.doherty@intel.com>
>  F: app/test-crypto-perf/
> diff --git a/doc/guides/rel_notes/release_18_11.rst
> b/doc/guides/rel_notes/release_18_11.rst
> index 32ff0e5..d44cf30 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -328,6 +328,11 @@ New Features
>    additional command-line parameter values from the "DPDK_TEST_PARAMS"
>    environment variable to make this application easier to use.
> 
> +* **Added a compression performance test tool.**

It is mentioned as test application above, here test tool. Should we make this synced?

> 
>  API Changes
>  -----------
> diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
> new file mode 100644 index 0000000..1428348
> --- /dev/null
> +++ b/doc/guides/tools/comp_perf.rst
> @@ -0,0 +1,75 @@
> +..  SPDX-License-Identifier: BSD-3-Clause
> +    Copyright(c) 2018 Intel Corporation.
> +
> +dpdk-test-compress-perf Application
> +===================================

Should we remove extra '='?

> +
> +The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit
> +(DPDK) utility that allows measuring performance parameters of PMDs

Should we keep this as test application? Or is this an utility like pdump or procinfo?

> +
> +
> +Limitations
> +~~~~~~~~~~~

Extra '~'?

> +
> +* Only supports the fixed compression and stateless operation.

Is 'stateful' not supported a limitation rather than supporting stateless?

> +
> +Command line options
> +--------------------
> +

Missing '-'

> +Compiling the Application
> +-------------------------
> +
> +**Step 1: PMD setting**
> +
> +The ``dpdk-test-compress-perf`` tool depends on compression device
> +drivers PMD which can be disabled by default in the build configuration file
> ``common_base``.
> +The compression device drivers PMD which should be tested can be enabled
> by setting::
> +
> +   CONFIG_RTE_LIBRTE_PMD_ISAL=y

Does this test application other PMD? If not can we cover this in limitation?

> +
> +
> +Running the Application
> +-----------------------
> +
> +The tool application has a number of command line options. Here is the
> sample command line:

Do we need to reword 'tool application' to either 'test application' or 'test utility'?

> +
> +.. code-block:: console
> +
> +   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --
> input-file test.txt --seg-sz 8192
> +    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576
> + --max-num-sgl-segs 16 --huffman-enc fixed
> +
> diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst index
> a6e2c4c..24235ba 100644
> --- a/doc/guides/tools/index.rst
> +++ b/doc/guides/tools/index.rst
> @@ -42,3 +42,4 @@ DPDK Tools User Guides
>      testbbdev
>      cryptoperf
>      testeventdev
> +    comp_perf
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-11-23 15:10         ` Varghese, Vipin
  2018-11-23 15:24           ` Bruce Richardson
  0 siblings, 1 reply; 76+ messages in thread
From: Varghese, Vipin @ 2018-11-23 15:10 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Jozwiak, TomaszX,
	Shally.Verma, akhil.goyal

<snipped>

> +#define MAX_DRIVER_NAME		64
> +#define MAX_INPUT_FILE_NAME	64

<snipped>

> +comp_perf_options_check(struct comp_test_data *test_data) {
> +	if (strcmp(test_data->driver_name, "") == 0) {

Should we use strncmp for boundary?

> +		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
> +		return -1;
> +	}
> +
> +	if (strcmp(test_data->input_file, "") == 0) {

Should we use strncmp for boundary?

> +		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
> +		return -1;
> +	}
> +

<snipped>

> +	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
> +					0, rte_socket_id());

Suggestion: if the PMD is socket specific, do we need to alloc socket specific too? (assuming HW or SW socket specific PMD can be used)

<snipped>

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files
  2018-11-23 15:00         ` Varghese, Vipin
@ 2018-11-23 15:12           ` Jozwiak, TomaszX
  2018-11-23 15:26             ` Varghese, Vipin
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-23 15:12 UTC (permalink / raw)
  To: Varghese, Vipin, dev, Trahe, Fiona, Shally.Verma, akhil.goyal



> -----Original Message-----
> From: Varghese, Vipin
> Sent: Friday, November 23, 2018 4:00 PM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; Jozwiak, TomaszX
> <tomaszx.jozwiak@intel.com>; Shally.Verma@cavium.com;
> akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files
> 
> Apologies Tomasz, I shared my comments in v3. sharing for v4
> 
> >  Crypto performance test application
> >  M: Declan Doherty <declan.doherty@intel.com>
> >  F: app/test-crypto-perf/
> > diff --git a/doc/guides/rel_notes/release_18_11.rst
> > b/doc/guides/rel_notes/release_18_11.rst
> > index 32ff0e5..d44cf30 100644
> > --- a/doc/guides/rel_notes/release_18_11.rst
> > +++ b/doc/guides/rel_notes/release_18_11.rst
> > @@ -328,6 +328,11 @@ New Features
> >    additional command-line parameter values from the
> "DPDK_TEST_PARAMS"
> >    environment variable to make this application easier to use.
> >
> > +* **Added a compression performance test tool.**
> 
> It is mentioned as test application above, here test tool. Should we make this
> synced?

Will sync

> 
> >
> >  API Changes
> >  -----------
> > diff --git a/doc/guides/tools/comp_perf.rst
> > b/doc/guides/tools/comp_perf.rst new file mode 100644 index
> > 0000000..1428348
> > --- /dev/null
> > +++ b/doc/guides/tools/comp_perf.rst
> > @@ -0,0 +1,75 @@
> > +..  SPDX-License-Identifier: BSD-3-Clause
> > +    Copyright(c) 2018 Intel Corporation.
> > +
> > +dpdk-test-compress-perf Application
> > +===================================
> 
> Should we remove extra '='?

It's ok. Please take a look at console output or count the chars 



> 
> > +
> > +The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit
> > +(DPDK) utility that allows measuring performance parameters of PMDs
> 
> Should we keep this as test application? Or is this an utility like pdump or
> procinfo?
> 
> > +
> > +
> > +Limitations
> > +~~~~~~~~~~~
> 
> Extra '~'?

No. It's ok. Please take a look at console output or count the chars


> 
> > +
> > +* Only supports the fixed compression and stateless operation.
> 
> Is 'stateful' not supported a limitation rather than supporting stateless?

In this initial version we support only stateless operation. Will be updated in the future versions.


> 
> > +
> > +Command line options
> > +--------------------
> > +
> 
> Missing '-'

No. It's ok. Please take a look at console output or count the chars

> 
> > +Compiling the Application
> > +-------------------------
> > +
> > +**Step 1: PMD setting**
> > +
> > +The ``dpdk-test-compress-perf`` tool depends on compression device
> > +drivers PMD which can be disabled by default in the build
> > +configuration file
> > ``common_base``.
> > +The compression device drivers PMD which should be tested can be
> > +enabled
> > by setting::
> > +
> > +   CONFIG_RTE_LIBRTE_PMD_ISAL=y
> 
> Does this test application other PMD? If not can we cover this in limitation?

Should be eg.:


> 
> > +
> > +
> > +Running the Application
> > +-----------------------
> > +
> > +The tool application has a number of command line options. Here is
> > +the
> > sample command line:
> 
> Do we need to reword 'tool application' to either 'test application' or 'test
> utility'?

Will sync this.

> 
> > +
> > +.. code-block:: console
> > +
> > +   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name
> > + compress_qat --
> > input-file test.txt --seg-sz 8192
> > +    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576
> > + --max-num-sgl-segs 16 --huffman-enc fixed
> > +
> > diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
> > index a6e2c4c..24235ba 100644
> > --- a/doc/guides/tools/index.rst
> > +++ b/doc/guides/tools/index.rst
> > @@ -42,3 +42,4 @@ DPDK Tools User Guides
> >      testbbdev
> >      cryptoperf
> >      testeventdev
> > +    comp_perf
> > --
> > 2.7.4




Thx, Tomek

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser
  2018-11-23 15:10         ` Varghese, Vipin
@ 2018-11-23 15:24           ` Bruce Richardson
  2018-11-23 15:42             ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Bruce Richardson @ 2018-11-23 15:24 UTC (permalink / raw)
  To: Varghese, Vipin
  Cc: Jozwiak, TomaszX, dev, Trahe, Fiona, Shally.Verma, akhil.goyal

On Fri, Nov 23, 2018 at 03:10:51PM +0000, Varghese, Vipin wrote:
> <snipped>
> 
> > +#define MAX_DRIVER_NAME		64
> > +#define MAX_INPUT_FILE_NAME	64
> 
> <snipped>
> 
> > +comp_perf_options_check(struct comp_test_data *test_data) {
> > +	if (strcmp(test_data->driver_name, "") == 0) {
> 
> Should we use strncmp for boundary?
> 
Shouldn't be necessary here, but a better check might be just to check if
test_data->driver_name[0] == '\0'. No need to use a string function for
checking for an empty string.

/Bruce

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/5] doc/guides/tools: add doc files
  2018-11-23 15:12           ` Jozwiak, TomaszX
@ 2018-11-23 15:26             ` Varghese, Vipin
  0 siblings, 0 replies; 76+ messages in thread
From: Varghese, Vipin @ 2018-11-23 15:26 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Shally.Verma, akhil.goyal

Thank you for the update

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser
  2018-11-23 15:24           ` Bruce Richardson
@ 2018-11-23 15:42             ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-23 15:42 UTC (permalink / raw)
  To: Richardson, Bruce, Varghese, Vipin
  Cc: dev, Trahe, Fiona, Shally.Verma, akhil.goyal



> -----Original Message-----
> From: Richardson, Bruce
> Sent: Friday, November 23, 2018 4:25 PM
> To: Varghese, Vipin <vipin.varghese@intel.com>
> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.trahe@intel.com>; Shally.Verma@cavium.com;
> akhil.goyal@nxp.com
> Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/compress-perf: add parser
> 
> On Fri, Nov 23, 2018 at 03:10:51PM +0000, Varghese, Vipin wrote:
> > <snipped>
> >
> > > +#define MAX_DRIVER_NAME		64
> > > +#define MAX_INPUT_FILE_NAME	64
> >
> > <snipped>
> >
> > > +comp_perf_options_check(struct comp_test_data *test_data) {
> > > +	if (strcmp(test_data->driver_name, "") == 0) {
> >
> > Should we use strncmp for boundary?
> >
> Shouldn't be necessary here, but a better check might be just to check if
> test_data->driver_name[0] == '\0'. No need to use a string function for
> checking for an empty string.
> 
> /Bruce


True Bruce :D , thx, will update these lines.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-10-17 16:47             ` Verma, Shally
@ 2018-11-30 14:43               ` Jozwiak, TomaszX
  2018-12-02  6:39                 ` Verma, Shally
  0 siblings, 1 reply; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-11-30 14:43 UTC (permalink / raw)
  To: Verma, Shally, Trahe, Fiona, Daly, Lee; +Cc: dev, akhil.goyal

Hi Shally,

I'm about of sending V5 of compression-perf tool.

Our performance testing shows that the number of sgls in a chain can be a factor in the performance.
So we want to keep this on the cmd line for the performance tool.
There are alternatives, like setting the input size and segment size to get the num segments desired, but I prefer
to have the option to specify the num segments explicitly.
We'll document that if the max-num-sgl-segs x seg_sz > input size then segments number in the chain will be lower ( to store all the data)
As regards adding the max_nb_segments_per_sgl into the rte_compressdev_info struct we're investigating
another workaround to this limitation in QAT, so will leave this off the API unless some other PMD needs it.
In the meantime we'll document the limitation in QAT.

Please let me know your thoughts.

--
Tomek

> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Wednesday, October 17, 2018 6:48 PM
> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> 
> 
> >-----Original Message-----
> >From: Trahe, Fiona <fiona.trahe@intel.com>
> >Sent: 17 October 2018 22:15
> >To: Verma, Shally <Shally.Verma@cavium.com>; Daly, Lee
> ><lee.daly@intel.com>
> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >akhil.goyal@nxp.com; Trahe, Fiona <fiona.trahe@intel.com>
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Wednesday, October 17, 2018 8:43 AM
> >> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee
> >> <lee.daly@intel.com>
> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> akhil.goyal@nxp.com
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> performance measurement
> >>
> >>
> >>
> >> >-----Original Message-----
> >> >From: Trahe, Fiona <fiona.trahe@intel.com>
> >> >Sent: 17 October 2018 20:04
> >> >To: Daly, Lee <lee.daly@intel.com>; Verma, Shally
> >> ><Shally.Verma@cavium.com>
> >> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >akhil.goyal@nxp.com; Trahe, Fiona
> >> <fiona.trahe@intel.com>
> >> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >performance measurement
> >> >
> >> >External Email
> >> >
> >> >Hi Shally, Lee,
> >> >
> >> >> -----Original Message-----
> >> >> From: Daly, Lee
> >> >> Sent: Monday, October 15, 2018 8:10 AM
> >> >> To: Verma, Shally <Shally.Verma@cavium.com>
> >> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> performance measurement
> >> >>
> >> >> Thanks for your input Shally see comments below.
> >> >>
> >> >>
> >> >> I will be reviewing these changes while Tomasz is out this week.
> >> >>
> >> >> > -----Original Message-----
> >> >> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma,
> >> >> > Shally
> >> >> > Sent: Friday, October 12, 2018 11:16 AM
> >> >> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>;
> dev@dpdk.org;
> >> >> > Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De
> >> >> > Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> >> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> > performance measurement
> >> >> >
> >> >///
> >> >
> >> >> >Also, why do we need --max-num-
> >> >> > sgl-segs as an input option from user? Shouldn't input_sz and
> >> >> >seg_sz  internally decide on num-segs?
> >> >> > Or is it added to serve some other different purpose?
> >> >> Will have to get back to you on this one, seems illogical to get
> >> >> this input from user, But I will have to do further investigation to find if
> there was a different purpose.
> >> >
> >> >[Fiona] Some PMDs have a limit on how many links can be in an sgl
> >> >chain, e.g. in QAT case the PMD allocates a pool of internal
> >> >structures of a suitable size during device initialisation, this is not a hard
> limit but can be configured in .config to give the user control over the
> memory resources allocated.
> >> >This perf-tool max-num-sgl-segs is so the user can pick a value <=
> whatever the PMD's max is.
> >>
> >> Then also, I believe this could be taken care internally by an app.
> >> App can choose convenient number of sgl segs as per PMD capability
> >> and input sz and chunk sz selected by user.
> >> Just my thoughts.
> >[Fiona] Then we'd need to add this capability to the API, e.g. add
> >uint16_t max_nb_segments_per_sgl into the rte_compressdev_info struct.
> >Special case 0 means no limit.
> >We did consider this before, I can't remember why we didn't do it, I think
> it's needed.
> >I'll push an API patch for this in 19.02 and we can remove the
> >--max-num-sgl-segs param from the performance tool and hardcode it in
> the tool in the meantime.
> >Ok?
> Yea. Sounds better.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-11-30 14:43               ` Jozwiak, TomaszX
@ 2018-12-02  6:39                 ` Verma, Shally
  2018-12-05  8:51                   ` Jozwiak, TomaszX
  0 siblings, 1 reply; 76+ messages in thread
From: Verma, Shally @ 2018-12-02  6:39 UTC (permalink / raw)
  To: Jozwiak, TomaszX, Trahe, Fiona, Daly, Lee; +Cc: dev, akhil.goyal

Ok. Then to keep it simple can we keep input sz and max-num-segs-sgl at cmd line input. I don't think segsz is required to input then?

Thanks
Shally 

>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
>Sent: 30 November 2018 20:13
>To: Verma, Shally <Shally.Verma@cavium.com>; Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
>Cc: dev@dpdk.org; akhil.goyal@nxp.com
>Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>Hi Shally,
>
>I'm about of sending V5 of compression-perf tool.
>
>Our performance testing shows that the number of sgls in a chain can be a factor in the performance.
>So we want to keep this on the cmd line for the performance tool.
>There are alternatives, like setting the input size and segment size to get the num segments desired, but I prefer
>to have the option to specify the num segments explicitly.
>We'll document that if the max-num-sgl-segs x seg_sz > input size then segments number in the chain will be lower ( to store all the
>data)
>As regards adding the max_nb_segments_per_sgl into the rte_compressdev_info struct we're investigating
>another workaround to this limitation in QAT, so will leave this off the API unless some other PMD needs it.
>In the meantime we'll document the limitation in QAT.
>
>Please let me know your thoughts.
>
>--
>Tomek
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> Sent: Wednesday, October 17, 2018 6:48 PM
>> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
>> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> akhil.goyal@nxp.com
>> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> measurement
>>
>>
>>
>> >-----Original Message-----
>> >From: Trahe, Fiona <fiona.trahe@intel.com>
>> >Sent: 17 October 2018 22:15
>> >To: Verma, Shally <Shally.Verma@cavium.com>; Daly, Lee
>> ><lee.daly@intel.com>
>> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> >akhil.goyal@nxp.com; Trahe, Fiona <fiona.trahe@intel.com>
>> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
>> >measurement
>> >
>> >External Email
>> >
>> >> -----Original Message-----
>> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
>> >> Sent: Wednesday, October 17, 2018 8:43 AM
>> >> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee
>> >> <lee.daly@intel.com>
>> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> >> akhil.goyal@nxp.com
>> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
>> >> performance measurement
>> >>
>> >>
>> >>
>> >> >-----Original Message-----
>> >> >From: Trahe, Fiona <fiona.trahe@intel.com>
>> >> >Sent: 17 October 2018 20:04
>> >> >To: Daly, Lee <lee.daly@intel.com>; Verma, Shally
>> >> ><Shally.Verma@cavium.com>
>> >> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> >> >akhil.goyal@nxp.com; Trahe, Fiona
>> >> <fiona.trahe@intel.com>
>> >> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
>> >> >performance measurement
>> >> >
>> >> >External Email
>> >> >
>> >> >Hi Shally, Lee,
>> >> >
>> >> >> -----Original Message-----
>> >> >> From: Daly, Lee
>> >> >> Sent: Monday, October 15, 2018 8:10 AM
>> >> >> To: Verma, Shally <Shally.Verma@cavium.com>
>> >> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
>> >> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
>> >> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
>> >> >> performance measurement
>> >> >>
>> >> >> Thanks for your input Shally see comments below.
>> >> >>
>> >> >>
>> >> >> I will be reviewing these changes while Tomasz is out this week.
>> >> >>
>> >> >> > -----Original Message-----
>> >> >> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma,
>> >> >> > Shally
>> >> >> > Sent: Friday, October 12, 2018 11:16 AM
>> >> >> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>;
>> dev@dpdk.org;
>> >> >> > Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De
>> >> >> > Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
>> >> >> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
>> >> >> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
>> >> >> > performance measurement
>> >> >> >
>> >> >///
>> >> >
>> >> >> >Also, why do we need --max-num-
>> >> >> > sgl-segs as an input option from user? Shouldn't input_sz and
>> >> >> >seg_sz  internally decide on num-segs?
>> >> >> > Or is it added to serve some other different purpose?
>> >> >> Will have to get back to you on this one, seems illogical to get
>> >> >> this input from user, But I will have to do further investigation to find if
>> there was a different purpose.
>> >> >
>> >> >[Fiona] Some PMDs have a limit on how many links can be in an sgl
>> >> >chain, e.g. in QAT case the PMD allocates a pool of internal
>> >> >structures of a suitable size during device initialisation, this is not a hard
>> limit but can be configured in .config to give the user control over the
>> memory resources allocated.
>> >> >This perf-tool max-num-sgl-segs is so the user can pick a value <=
>> whatever the PMD's max is.
>> >>
>> >> Then also, I believe this could be taken care internally by an app.
>> >> App can choose convenient number of sgl segs as per PMD capability
>> >> and input sz and chunk sz selected by user.
>> >> Just my thoughts.
>> >[Fiona] Then we'd need to add this capability to the API, e.g. add
>> >uint16_t max_nb_segments_per_sgl into the rte_compressdev_info struct.
>> >Special case 0 means no limit.
>> >We did consider this before, I can't remember why we didn't do it, I think
>> it's needed.
>> >I'll push an API patch for this in 19.02 and we can remove the
>> >--max-num-sgl-segs param from the performance tool and hardcode it in
>> the tool in the meantime.
>> >Ok?
>> Yea. Sounds better.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf
  2018-11-23 14:27     ` [dpdk-dev] [PATCH v4 0/5] add initial version of compress-perf Tomasz Jozwiak
                         ` (4 preceding siblings ...)
  2018-11-23 14:27       ` [dpdk-dev] [PATCH v4 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
@ 2018-12-05  8:47       ` Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 1/5] app/compress-perf: add parser Tomasz Jozwiak
                           ` (7 more replies)
  5 siblings, 8 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v5 changes:
  - Fixed documentation
  - Added op's status checking after rte_compressdev_dequeue_burst
  - code cleanup

  Note: The 19.02 release notes will be updated once the file has been created

v4 changes:
  - fixed checkpatch issues
  - code cleanup

v3 changes:
  - Added dynamic compression
  - Code refactoring to separate validation
    from benchmarking part
  - Updated documentation
  - Added fail detection from rte_compressdev_enqueue_burst
    and rte_compressdev_dequeue_burst functions
  - Code cleanup

v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation


Tomasz Jozwiak (5):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files
  app/compress-perf: add dynamic compression test
  app/compress-perf: code refactoring

 MAINTAINERS                                       |   5 +
 app/Makefile                                      |   4 +
 app/meson.build                                   |   1 +
 app/test-compress-perf/Makefile                   |  18 +
 app/test-compress-perf/comp_perf_options.h        |  71 +++
 app/test-compress-perf/comp_perf_options_parse.c  | 596 ++++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.c | 308 +++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 353 +++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 590 +++++++++++++++++++++
 app/test-compress-perf/meson.build                |   9 +
 config/common_base                                |   5 +
 doc/guides/rel_notes/release_18_11.rst            |   4 +
 doc/guides/tools/comp_perf.rst                    |  81 +++
 doc/guides/tools/index.rst                        |   1 +
 16 files changed, 2072 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 1/5] app/compress-perf: add parser
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
@ 2018-12-05  8:47         ` Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
                           ` (6 subsequent siblings)
  7 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 592 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 736 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index a9a026b..47a2a86 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..7f1a7ff
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 16)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/default]: Huffman encoding\n"
+		"		(default: fixed)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (test_data->driver_name[0] == '\0') {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (test_data->input_file[0] == '\0') {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index d12ae98..2ab4b7b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 2/5] app/compress-perf: add performance measurement
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 1/5] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-12-05  8:47         ` Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
                           ` (5 subsequent siblings)
  7 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c |   2 +-
 app/test-compress-perf/main.c                    | 888 ++++++++++++++++++++++-
 2 files changed, 884 insertions(+), 6 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 7f1a7ff..add5c8a 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -565,7 +565,7 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
-	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
 	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
 	test_data->test_op = COMPRESS_DECOMPRESS;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..4b183a8 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,14 +5,730 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_COMPRESSED_BUF_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+/* Cleanup state machine */
+static enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_PREPARE_BUF,
+	ST_DURING_TEST
+} cleanup = ST_CLEAR;
+
+static int
+param_range_check(uint16_t size, const struct rte_param_log2_range *range)
+{
+	unsigned int next_size;
+
+	/* Check lower/upper bounds */
+	if (size < range->min)
+		return -1;
+
+	if (size > range->max)
+		return -1;
+
+	/* If range is actually only one value, size is correct */
+	if (range->increment == 0)
+		return 0;
+
+	/* Check if value is one of the supported sizes */
+	for (next_size = range->min; next_size <= range->max;
+			next_size += range->increment)
+		if (size == next_size)
+			return 0;
+
+	return -1;
+}
+
+static int
+comp_perf_check_capabilities(struct comp_test_data *test_data)
+{
+	const struct rte_compressdev_capabilities *cap;
+
+	cap = rte_compressdev_capability_get(test_data->cdev_id,
+					     RTE_COMP_ALGO_DEFLATE);
+
+	if (cap == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not support DEFLATE\n");
+		return -1;
+	}
+
+	uint64_t comp_flags = cap->comp_feature_flags;
+
+	/* Huffman enconding */
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Fixed Huffman\n");
+		return -1;
+	}
+
+	if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
+			(comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
+		RTE_LOG(ERR, USER1,
+			"Compress device does not supported Dynamic Huffman\n");
+		return -1;
+	}
+
+	/* Window size */
+	if (test_data->window_sz != -1) {
+		if (param_range_check(test_data->window_sz, &cap->window_size)
+				< 0) {
+			RTE_LOG(ERR, USER1,
+				"Compress device does not support "
+				"this window size\n");
+			return -1;
+		}
+	} else
+		/* Set window size to PMD maximum if none was specified */
+		test_data->window_sz = cap->window_size.max;
+
+	/* Check if chained mbufs is supported */
+	if (test_data->max_sgl_segs > 1  &&
+			(comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
+		RTE_LOG(INFO, USER1, "Compress device does not support "
+				"chained mbufs. Max SGL segments set to 1\n");
+		test_data->max_sgl_segs = 1;
+	}
+
+	/* Level 0 support */
+	if (test_data->level.min == 0 &&
+			(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
+		RTE_LOG(ERR, USER1, "Compress device does not support "
+				"level 0 (no compression)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+comp_perf_allocate_memory(struct comp_test_data *test_data)
+{
+	/* Number of segments for input and output
+	 * (compression and decompression)
+	 */
+	uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
+			test_data->seg_sz);
+	test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->comp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	cleanup = ST_MEMORY_ALLOC;
+	test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
+				total_segs,
+				0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
+				rte_socket_id());
+	if (test_data->decomp_buf_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
+		return -1;
+	}
+
+	test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
+
+	test_data->op_pool = rte_comp_op_pool_create("op_pool",
+				  test_data->total_bufs,
+				  0, 0, rte_socket_id());
+	if (test_data->op_pool == NULL) {
+		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
+		return -1;
+	}
+
+	/*
+	 * Compressed data might be a bit larger than input data,
+	 * if data cannot be compressed
+	 */
+	test_data->compressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz * EXPANSE_RATIO
+						+ MIN_COMPRESSED_BUF_SIZE, 0,
+				rte_socket_id());
+	if (test_data->compressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decompressed_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0,
+				rte_socket_id());
+	if (test_data->decompressed_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		return -1;
+	}
+
+	test_data->comp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->comp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+
+	test_data->decomp_bufs = rte_zmalloc_socket(NULL,
+			test_data->total_bufs * sizeof(struct rte_mbuf *),
+			0, rte_socket_id());
+	if (test_data->decomp_bufs == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
+				" could not be allocated\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+comp_perf_dump_input_data(struct comp_test_data *test_data)
+{
+	FILE *f = fopen(test_data->input_file, "r");
+	int ret = -1;
+
+	if (f == NULL) {
+		RTE_LOG(ERR, USER1, "Input file could not be opened\n");
+		return -1;
+	}
+
+	if (fseek(f, 0, SEEK_END) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+	size_t actual_file_sz = ftell(f);
+	/* If extended input data size has not been set,
+	 * input data size = file size
+	 */
+
+	if (test_data->input_data_sz == 0)
+		test_data->input_data_sz = actual_file_sz;
+
+	if (fseek(f, 0, SEEK_SET) != 0) {
+		RTE_LOG(ERR, USER1, "Size of input could not be calculated\n");
+		goto end;
+	}
+
+	test_data->input_data = rte_zmalloc_socket(NULL,
+				test_data->input_data_sz, 0, rte_socket_id());
+
+	if (test_data->input_data == NULL) {
+		RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
+				"file could not be allocated\n");
+		goto end;
+	}
+
+	size_t remaining_data = test_data->input_data_sz;
+	uint8_t *data = test_data->input_data;
+
+	while (remaining_data > 0) {
+		size_t data_to_read = RTE_MIN(remaining_data, actual_file_sz);
+
+		if (fread(data, data_to_read, 1, f) != 1) {
+			RTE_LOG(ERR, USER1, "Input file could not be read\n");
+			goto end;
+		}
+		if (fseek(f, 0, SEEK_SET) != 0) {
+			RTE_LOG(ERR, USER1,
+				"Size of input could not be calculated\n");
+			goto end;
+		}
+		remaining_data -= data_to_read;
+		data += data_to_read;
+	}
+
+	if (test_data->input_data_sz > actual_file_sz)
+		RTE_LOG(INFO, USER1,
+		  "%zu bytes read from file %s, extending the file %.2f times\n",
+			test_data->input_data_sz, test_data->input_file,
+			(double)test_data->input_data_sz/actual_file_sz);
+	else
+		RTE_LOG(INFO, USER1,
+			"%zu bytes read from file %s\n",
+			test_data->input_data_sz, test_data->input_file);
+
+	ret = 0;
+
+end:
+	fclose(f);
+	return ret;
+}
+
+static int
+comp_perf_initialize_compressdev(struct comp_test_data *test_data)
+{
+	uint8_t enabled_cdev_count;
+	uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
+
+	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
+			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
+	if (enabled_cdev_count == 0) {
+		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+				test_data->driver_name);
+		return -EINVAL;
+	}
+
+	if (enabled_cdev_count > 1)
+		RTE_LOG(INFO, USER1,
+			"Only the first compress device will be used\n");
+
+	test_data->cdev_id = enabled_cdevs[0];
+
+	if (comp_perf_check_capabilities(test_data) < 0)
+		return -1;
+
+	/* Configure compressdev (one device, one queue pair) */
+	struct rte_compressdev_config config = {
+		.socket_id = rte_socket_id(),
+		.nb_queue_pairs = 1,
+		.max_nb_priv_xforms = NUM_MAX_XFORMS,
+		.max_nb_streams = 0
+	};
+
+	if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
+		RTE_LOG(ERR, USER1, "Device configuration failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
+			NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
+		RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
+		return -1;
+	}
+
+	if (rte_compressdev_start(test_data->cdev_id) < 0) {
+		RTE_LOG(ERR, USER1, "Device could not be started\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+prepare_bufs(struct comp_test_data *test_data)
+{
+	uint32_t remaining_data = test_data->input_data_sz;
+	uint8_t *input_data_ptr = test_data->input_data;
+	size_t data_sz;
+	uint8_t *data_addr;
+	uint32_t i, j;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		/* Allocate data in input mbuf and copy data from input file */
+		test_data->decomp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+		if (test_data->decomp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+
+		cleanup = ST_PREPARE_BUF;
+		data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->decomp_bufs[i], data_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+		rte_memcpy(data_addr, input_data_ptr, data_sz);
+
+		input_data_ptr += data_sz;
+		remaining_data -= data_sz;
+
+		/* Already one segment in the mbuf */
+		uint16_t segs_per_mbuf = 1;
+
+		/* Chain mbufs if needed for input mbufs */
+		while (segs_per_mbuf < test_data->max_sgl_segs
+				&& remaining_data > 0) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->decomp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				data_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			rte_memcpy(data_addr, input_data_ptr, data_sz);
+			input_data_ptr += data_sz;
+			remaining_data -= data_sz;
+
+			if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+			segs_per_mbuf++;
+		}
+
+		/* Allocate data in output mbuf */
+		test_data->comp_bufs[i] =
+			rte_pktmbuf_alloc(test_data->comp_buf_pool);
+		if (test_data->comp_bufs[i] == NULL) {
+			RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
+			return -1;
+		}
+		data_addr = (uint8_t *) rte_pktmbuf_append(
+					test_data->comp_bufs[i],
+					test_data->seg_sz);
+		if (data_addr == NULL) {
+			RTE_LOG(ERR, USER1, "Could not append data\n");
+			return -1;
+		}
+
+		/* Chain mbufs if needed for output mbufs */
+		for (j = 1; j < segs_per_mbuf; j++) {
+			struct rte_mbuf *next_seg =
+				rte_pktmbuf_alloc(test_data->comp_buf_pool);
+
+			if (next_seg == NULL) {
+				RTE_LOG(ERR, USER1,
+					"Could not allocate mbuf\n");
+				return -1;
+			}
+
+			data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
+				test_data->seg_sz);
+
+			if (data_addr == NULL) {
+				RTE_LOG(ERR, USER1, "Could not append data\n");
+				return -1;
+			}
+
+			if (rte_pktmbuf_chain(test_data->comp_bufs[i],
+					next_seg) < 0) {
+				RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+free_bufs(struct comp_test_data *test_data)
+{
+	uint32_t i;
+
+	for (i = 0; i < test_data->total_bufs; i++) {
+		rte_pktmbuf_free(test_data->comp_bufs[i]);
+		rte_pktmbuf_free(test_data->decomp_bufs[i]);
+	}
+}
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz,
+			unsigned int benchmarking)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	if (benchmarking) {
+		tsc_start = rte_rdtsc();
+		num_iter = test_data->num_iter;
+	} else
+		num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							  op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			total_deq_ops += num_deq;
+			if (benchmarking == 0) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst,
+							op->dst.offset,
+							op->produced,
+							output_data_ptr);
+					if (read_data_addr == NULL) {
+						RTE_LOG(ERR, USER1,
+				      "Could not copy buffer in destination\n");
+						res = -1;
+						goto end;
+					}
+
+					if (read_data_addr != output_data_ptr)
+						rte_memcpy(output_data_ptr,
+							rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+							op->produced);
+					output_data_ptr += op->produced;
+					output_size += op->produced;
+
+				}
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (benchmarking) {
+		tsc_end = rte_rdtsc();
+		tsc_duration = tsc_end - tsc_start;
+
+		if (type == RTE_COMP_COMPRESS)
+			test_data->comp_tsc_duration[level] =
+					tsc_duration / num_iter;
+		else
+			test_data->decomp_tsc_duration[level] =
+					tsc_duration / num_iter;
+	}
+
+	if (benchmarking == 0 && output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
 int
 main(int argc, char **argv)
 {
-	int ret;
+	uint8_t level, level_idx = 0;
+	int ret, i;
 	struct comp_test_data *test_data;
 
 	/* Initialise DPDK EAL */
@@ -29,24 +745,186 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
 	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
 		RTE_LOG(ERR, USER1,
 			"Parsing one or more user options failed\n");
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
 	}
 
 	if (comp_perf_options_check(test_data) < 0) {
 		ret = EXIT_FAILURE;
-		goto err;
+		goto end;
+	}
+
+	if (comp_perf_initialize_compressdev(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_COMPDEV;
+	if (comp_perf_dump_input_data(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	cleanup = ST_INPUT_DATA;
+	if (comp_perf_allocate_memory(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (prepare_bufs(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->level.inc != 0)
+		level = test_data->level.min;
+	else
+		level = test_data->level.list[0];
+
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+
+	printf("Burst size = %u\n", test_data->burst_sz);
+	printf("File size = %zu\n", test_data->input_data_sz);
+
+	printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
+		"Level", "Comp size", "Comp ratio [%]",
+		"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
+		"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
+
+	cleanup = ST_DURING_TEST;
+	while (level <= test_data->level.max) {
+		/*
+		 * Run a first iteration, to verify compression and
+		 * get the compression ratio for the level
+		 */
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+			      test_data->compressed_data,
+			      &comp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+			      test_data->decompressed_data,
+			      &decomp_data_sz, 0) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+
+		if (decomp_data_sz != test_data->input_data_sz) {
+			RTE_LOG(ERR, USER1,
+		   "Decompressed data length not equal to input data length\n");
+			RTE_LOG(ERR, USER1,
+				"Decompressed size = %zu, expected = %zu\n",
+				decomp_data_sz, test_data->input_data_sz);
+			ret = EXIT_FAILURE;
+			goto end;
+		} else {
+			if (memcmp(test_data->decompressed_data,
+					test_data->input_data,
+					test_data->input_data_sz) != 0) {
+				RTE_LOG(ERR, USER1,
+			    "Decompressed data is not the same as file data\n");
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		double ratio = (double) comp_data_sz /
+						test_data->input_data_sz * 100;
+
+		/*
+		 * Run the tests twice, discarding the first performance
+		 * results, before the cache is warmed up
+		 */
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+					NULL, NULL, 1) < 0) {
+				ret = EXIT_FAILURE;
+				goto end;
+			}
+		}
+
+		uint64_t comp_tsc_duration =
+				test_data->comp_tsc_duration[level];
+		double comp_tsc_byte = (double)comp_tsc_duration /
+						test_data->input_data_sz;
+		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
+				1000000000;
+		uint64_t decomp_tsc_duration =
+				test_data->decomp_tsc_duration[level];
+		double decomp_tsc_byte = (double)decomp_tsc_duration /
+						test_data->input_data_sz;
+		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
+				1000000000;
+
+		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
+					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
+		       level, comp_data_sz, ratio, comp_tsc_duration,
+		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
+		       decomp_tsc_byte, decomp_gbps);
+
+		if (test_data->level.inc != 0)
+			level += test_data->level.inc;
+		else {
+			if (++level_idx == test_data->level.count)
+				break;
+			level = test_data->level.list[level_idx];
+		}
 	}
 
 	ret = EXIT_SUCCESS;
 
-err:
-	rte_free(test_data);
+end:
+	switch (cleanup) {
 
+	case ST_DURING_TEST:
+	case ST_PREPARE_BUF:
+		free_bufs(test_data);
+		/* fallthrough */
+	case ST_MEMORY_ALLOC:
+		rte_free(test_data->decomp_bufs);
+		rte_free(test_data->comp_bufs);
+		rte_free(test_data->decompressed_data);
+		rte_free(test_data->compressed_data);
+		rte_mempool_free(test_data->op_pool);
+		rte_mempool_free(test_data->decomp_buf_pool);
+		rte_mempool_free(test_data->comp_buf_pool);
+		/* fallthrough */
+	case ST_INPUT_DATA:
+		rte_free(test_data->input_data);
+		/* fallthrough */
+	case ST_COMPDEV:
+		if (test_data->cdev_id != -1)
+			rte_compressdev_stop(test_data->cdev_id);
+		/* fallthrough */
+	case ST_TEST_DATA:
+		rte_free(test_data);
+		/* fallthrough */
+	case ST_CLEAR:
+	default:
+		i = rte_eal_cleanup();
+		if (i) {
+			RTE_LOG(ERR, USER1,
+				"Error from rte_eal_cleanup(), %d\n", i);
+			ret = i;
+		}
+		break;
+	}
 	return ret;
 }
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 3/5] doc/guides/tools: add doc files
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 1/5] app/compress-perf: add parser Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
@ 2018-12-05  8:47         ` Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
                           ` (4 subsequent siblings)
  7 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added:
 -  initial version of compression performance test
    description file.
 -  release note in release_18_11.rst

Updated index.rst file

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 MAINTAINERS                            |  5 +++
 doc/guides/rel_notes/release_18_11.rst |  5 +++
 doc/guides/tools/comp_perf.rst         | 81 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/index.rst             |  1 +
 4 files changed, 92 insertions(+)
 create mode 100644 doc/guides/tools/comp_perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 71ba312..dd0c131 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1242,6 +1242,11 @@ M: Bernard Iremonger <bernard.iremonger@intel.com>
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Compression performance test application
+M: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
+F: app/test-compress-perf/
+F: doc/guides/tools/comp_perf.rst
+
 Crypto performance test application
 M: Declan Doherty <declan.doherty@intel.com>
 F: app/test-crypto-perf/
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 65bab55..e6652a5 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -352,6 +352,11 @@ New Features
   additional command-line parameter values from the ``DPDK_TEST_PARAMS``
   environment variable to make this application easier to use.
 
+* **Added a compression performance test tool.**
+
+   Added a new performance test tool to test the compressdev PMD. The tool tests
+   compression ratio and compression throughput. Dynamic compression test is not
+   supported yet.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
new file mode 100644
index 0000000..d343408
--- /dev/null
+++ b/doc/guides/tools/comp_perf.rst
@@ -0,0 +1,81 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+dpdk-test-compress-perf Tool
+============================
+
+The ``dpdk-test-compress-perf`` tool is a Data Plane Development Kit (DPDK)
+utility that allows measuring performance parameters of PMDs available in the
+compress tree. The tool reads the data from a file (--input-file),
+dumps all the file into a buffer and fills out the data of input mbufs,
+which are passed to compress device with compression operations.
+Then, the output buffers are fed into the decompression stage, and the resulting
+data is compared against the original data (verification phase). After that,
+a number of iterations are performed, compressing first and decompressing later,
+to check the throughput rate
+(showing cycles/iteration, cycles/Byte and Gbps, for compression and decompression).
+
+.. Note::
+
+	if the max-num-sgl-segs x seg_sz > input size then segments number in
+	the chain will be lower than value passed into max-num-sgl-segs.
+
+
+Limitations
+~~~~~~~~~~~
+
+* Stateful operation and dynamic compression are not supported in this version.
+
+
+Command line options
+--------------------
+
+ ``--driver-name NAME``: compress driver to use
+
+ ``--input-file NAME``: file to compress and decompress
+
+ ``--extended-input-sz N``: extend file data up to this size (default: no extension)
+
+ ``--seg-sz N``: size of segment to store the data (default: 2048)
+
+ ``--burst-sz N``: compress operation burst size
+
+ ``--pool-sz N``: mempool size for compress operations/mbufs (default: 8192)
+
+ ``--max-num-sgl-segs N``: maximum number of segments for each mbuf (default: 16)
+
+ ``--num-iter N``: number of times the file will be compressed/decompressed (default: 10000)
+
+ ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
+
+ ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+
+ ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
+
+ ``--window-sz N``: base two log value of compression window size (default: max supported by PMD)
+
+ ``-h``: prints this help
+
+
+Compiling the Tool
+------------------
+
+**Step 1: PMD setting**
+
+The ``dpdk-test-compress-perf`` tool depends on compression device drivers PMD which
+can be disabled by default in the build configuration file ``common_base``.
+The compression device drivers PMD which should be tested can be enabled by setting e.g.::
+
+   CONFIG_RTE_LIBRTE_PMD_ISAL=y
+
+
+Running the Tool
+----------------
+
+The tool has a number of command line options. Here is the sample command line:
+
+.. code-block:: console
+
+   ./build/app/dpdk-test-compress-perf  -l 4 -- --driver-name compress_qat --input-file test.txt --seg-sz 8192
+    --compress-level 1:1:9 --num-iter 10 --extended-input-sz 1048576  --max-num-sgl-segs 16 --huffman-enc fixed
+
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index a6e2c4c..24235ba 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -42,3 +42,4 @@ DPDK Tools User Guides
     testbbdev
     cryptoperf
     testeventdev
+    comp_perf
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 4/5] app/compress-perf: add dynamic compression test
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
                           ` (2 preceding siblings ...)
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
@ 2018-12-05  8:47         ` Tomasz Jozwiak
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
                           ` (3 subsequent siblings)
  7 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added dynamic compression feature into compression perf. test.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c | 10 +++++++---
 doc/guides/rel_notes/release_18_11.rst           |  3 +--
 doc/guides/tools/comp_perf.rst                   |  4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index add5c8a..66eb81f 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -50,8 +50,8 @@ usage(char *progname)
 		"		compressed/decompressed (default: 10000)\n"
 		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
 		"		compression, decompression or both operations\n"
-		" --huffman-enc [fixed/default]: Huffman encoding\n"
-		"		(default: fixed)\n"
+		" --huffman-enc [fixed/dynamic/default]: Huffman encoding\n"
+		"		(default: dynamic)\n"
 		" --compress-level N: compression level, which could be a single value, list or range\n"
 		"		(default: range between 1 and 9)\n"
 		" --window-sz N: base two log value of compression window size\n"
@@ -431,6 +431,10 @@ parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
 		{
 			"fixed",
 			RTE_COMP_HUFFMAN_FIXED
+		},
+		{
+			"dynamic",
+			RTE_COMP_HUFFMAN_DYNAMIC
 		}
 	};
 
@@ -567,7 +571,7 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->pool_sz = 8192;
 	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
-	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
 	test_data->test_op = COMPRESS_DECOMPRESS;
 	test_data->window_sz = -1;
 	test_data->level.min = 1;
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index e6652a5..8d9cdd2 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -355,8 +355,7 @@ New Features
 * **Added a compression performance test tool.**
 
    Added a new performance test tool to test the compressdev PMD. The tool tests
-   compression ratio and compression throughput. Dynamic compression test is not
-   supported yet.
+   compression ratio and compression throughput.
 
 API Changes
 -----------
diff --git a/doc/guides/tools/comp_perf.rst b/doc/guides/tools/comp_perf.rst
index d343408..e086c2a 100644
--- a/doc/guides/tools/comp_perf.rst
+++ b/doc/guides/tools/comp_perf.rst
@@ -24,7 +24,7 @@ to check the throughput rate
 Limitations
 ~~~~~~~~~~~
 
-* Stateful operation and dynamic compression are not supported in this version.
+* Stateful operation is not supported in this version.
 
 
 Command line options
@@ -48,7 +48,7 @@ Command line options
 
  ``--operation [comp/decomp/comp_and_decomp]``: perform test on compression, decompression or both operations
 
- ``--huffman-enc [fixed/default]``: Huffman encoding (default: fixed)
+ ``--huffman-enc [fixed/dynamic/default]``: Huffman encoding (default: dynamic)
 
  ``--compress-level N``: compression level, which could be a single value, list or range (default: range between 1 and 9)
 
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v5 5/5] app/compress-perf: code refactoring
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
                           ` (3 preceding siblings ...)
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 4/5] app/compress-perf: add dynamic compression test Tomasz Jozwiak
@ 2018-12-05  8:47         ` Tomasz Jozwiak
  2018-12-05 15:37         ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Daly, Lee
                           ` (2 subsequent siblings)
  7 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-05  8:47 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Code refactoring to separate validation from benchmarking part.
Added op's status checking after rte_compressdev_dequeue_burst
function.

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/Makefile                   |   2 +
 app/test-compress-perf/comp_perf_options.h        |  12 +
 app/test-compress-perf/comp_perf_test_benchmark.c | 308 ++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 353 +++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 368 +---------------------
 app/test-compress-perf/meson.build                |   4 +-
 8 files changed, 718 insertions(+), 355 deletions(-)
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h

diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index 8aa7a22..d20e17e 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -12,5 +12,7 @@ CFLAGS += -O3
 # all source are stored in SRCS-y
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
+SRCS-y += comp_perf_test_verify.c
+SRCS-y += comp_perf_test_benchmark.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index 7516ea0..ca96a3c 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -2,6 +2,9 @@
  * Copyright(c) 2018 Intel Corporation
  */
 
+#ifndef _COMP_PERF_OPS_
+#define _COMP_PERF_OPS_
+
 #define MAX_DRIVER_NAME		64
 #define MAX_INPUT_FILE_NAME	64
 #define MAX_LIST		32
@@ -46,6 +49,13 @@ struct comp_test_data {
 	/* Store TSC duration for all levels (including level 0) */
 	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
 	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	size_t comp_data_sz;
+	size_t decomp_data_sz;
+	double ratio;
+	double comp_gbps;
+	double decomp_gbps;
+	double comp_tsc_byte;
+	double decomp_tsc_byte;
 };
 
 int
@@ -57,3 +67,5 @@ comp_perf_options_default(struct comp_test_data *test_data);
 
 int
 comp_perf_options_check(struct comp_test_data *test_data);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_benchmark.c
new file mode 100644
index 0000000..9aa2665
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.c
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_benchmark.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	tsc_start = tsc_end = tsc_duration = 0;
+	tsc_start = rte_rdtsc();
+	num_iter = test_data->num_iter;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+
+					if (op->status !=
+						RTE_COMP_OP_STATUS_SUCCESS) {
+						RTE_LOG(ERR, USER1,
+							"Some operations were not successful\n");
+						goto end;
+					}
+
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+
+					if (op->status !=
+						RTE_COMP_OP_STATUS_SUCCESS) {
+						RTE_LOG(ERR, USER1,
+							"Some operations were not successful\n");
+						goto end;
+					}
+
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	tsc_end = rte_rdtsc();
+	tsc_duration = tsc_end - tsc_start;
+
+	if (type == RTE_COMP_COMPRESS)
+		test_data->comp_tsc_duration[level] =
+				tsc_duration / num_iter;
+	else
+		test_data->decomp_tsc_duration[level] =
+				tsc_duration / num_iter;
+
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level)
+{
+	int i, ret = EXIT_SUCCESS;
+
+	/*
+	 * Run the tests twice, discarding the first performance
+	 * results, before the cache is warmed up
+	 */
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_COMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS) < 0) {
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->comp_tsc_byte =
+			(double)(test_data->comp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->decomp_tsc_byte =
+			(double)(test_data->decomp_tsc_duration[level]) /
+					test_data->input_data_sz;
+
+	test_data->comp_gbps = rte_get_tsc_hz() / test_data->comp_tsc_byte * 8 /
+			1000000000;
+
+	test_data->decomp_gbps = rte_get_tsc_hz() / test_data->decomp_tsc_byte
+			* 8 / 1000000000;
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_benchmark.h
new file mode 100644
index 0000000..b193445
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_benchmark.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_BENCHMARK_
+#define _COMP_PERF_TEST_BENCHMARK_
+
+#include "comp_perf_options.h"
+
+int
+cperf_benchmark(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
new file mode 100644
index 0000000..57a9930
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_verify.h"
+
+static int
+main_loop(struct comp_test_data *test_data, uint8_t level,
+			enum rte_comp_xform_type type,
+			uint8_t *output_data_ptr,
+			size_t *output_data_sz)
+{
+	uint8_t dev_id = test_data->cdev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	size_t output_size = 0;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int res = 0;
+	int allocated = 0;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1,
+			"Unknown burst size\n");
+		return -1;
+	}
+
+	ops = rte_zmalloc_socket(NULL,
+		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[test_data->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->decomp_bufs;
+		output_bufs = test_data->comp_bufs;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = test_data->comp_bufs;
+		output_bufs = test_data->decomp_bufs;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+			&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	num_iter = 1;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = test_data->total_bufs;
+		uint32_t remaining_ops = test_data->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		output_size = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && !rte_comp_op_bulk_alloc(
+						test_data->op_pool,
+						&ops[ops_unused],
+						ops_needed)) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = test_data->seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
+								num_ops);
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+							   deq_ops,
+							   test_data->burst_sz);
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+
+				if (op->status != RTE_COMP_OP_STATUS_SUCCESS) {
+					RTE_LOG(ERR, USER1,
+						"Some operations were not successful\n");
+					goto end;
+				}
+
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst, 0,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(op->m_dst,
+								    uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
+						deq_ops, test_data->burst_sz);
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			total_deq_ops += num_deq;
+
+			for (i = 0; i < num_deq; i++) {
+				struct rte_comp_op *op = deq_ops[i];
+
+				if (op->status != RTE_COMP_OP_STATUS_SUCCESS) {
+					RTE_LOG(ERR, USER1,
+						"Some operations were not successful\n");
+					goto end;
+				}
+
+				const void *read_data_addr =
+						rte_pktmbuf_read(op->m_dst,
+								 op->dst.offset,
+						op->produced, output_data_ptr);
+				if (read_data_addr == NULL) {
+					RTE_LOG(ERR, USER1,
+						"Could not copy buffer in destination\n");
+					res = -1;
+					goto end;
+				}
+
+				if (read_data_addr != output_data_ptr)
+					rte_memcpy(output_data_ptr,
+						   rte_pktmbuf_mtod(
+							op->m_dst, uint8_t *),
+						   op->produced);
+				output_data_ptr += op->produced;
+				output_size += op->produced;
+
+			}
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							test_data->seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(test_data->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+
+	if (output_data_sz)
+		*output_data_sz = output_size;
+end:
+	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+	return res;
+}
+
+
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level)
+{
+	int ret = EXIT_SUCCESS;
+
+	test_data->ratio = 0;
+
+	if (main_loop(test_data, level, RTE_COMP_COMPRESS,
+		      test_data->compressed_data,
+		      &test_data->comp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
+		      test_data->decompressed_data,
+		      &test_data->decomp_data_sz) < 0) {
+		ret = EXIT_FAILURE;
+		goto end;
+	}
+
+	if (test_data->decomp_data_sz != test_data->input_data_sz) {
+		RTE_LOG(ERR, USER1,
+	   "Decompressed data length not equal to input data length\n");
+		RTE_LOG(ERR, USER1,
+			"Decompressed size = %zu, expected = %zu\n",
+			test_data->decomp_data_sz, test_data->input_data_sz);
+		ret = EXIT_FAILURE;
+		goto end;
+	} else {
+		if (memcmp(test_data->decompressed_data,
+				test_data->input_data,
+				test_data->input_data_sz) != 0) {
+			RTE_LOG(ERR, USER1,
+		    "Decompressed data is not the same as file data\n");
+			ret = EXIT_FAILURE;
+			goto end;
+		}
+	}
+
+	test_data->ratio = (double) test_data->comp_data_sz /
+			test_data->input_data_sz * 100;
+
+end:
+	return ret;
+}
diff --git a/app/test-compress-perf/comp_perf_test_verify.h b/app/test-compress-perf/comp_perf_test_verify.h
new file mode 100644
index 0000000..67c6b49
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_verify.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_VERIFY_
+#define _COMP_PERF_TEST_VERIFY_
+
+#include "comp_perf_options.h"
+
+int
+cperf_verification(struct comp_test_data *test_data, uint8_t level);
+
+#endif
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 4b183a8..4de913e 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,10 +5,11 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
-#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
+#include "comp_perf_test_verify.h"
+#include "comp_perf_test_benchmark.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
@@ -442,287 +443,7 @@ free_bufs(struct comp_test_data *test_data)
 	}
 }
 
-static int
-main_loop(struct comp_test_data *test_data, uint8_t level,
-			enum rte_comp_xform_type type,
-			uint8_t *output_data_ptr,
-			size_t *output_data_sz,
-			unsigned int benchmarking)
-{
-	uint8_t dev_id = test_data->cdev_id;
-	uint32_t i, iter, num_iter;
-	struct rte_comp_op **ops, **deq_ops;
-	void *priv_xform = NULL;
-	struct rte_comp_xform xform;
-	size_t output_size = 0;
-	struct rte_mbuf **input_bufs, **output_bufs;
-	int res = 0;
-	int allocated = 0;
-
-	if (test_data == NULL || !test_data->burst_sz) {
-		RTE_LOG(ERR, USER1,
-			"Unknown burst size\n");
-		return -1;
-	}
-
-	ops = rte_zmalloc_socket(NULL,
-		2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
-		0, rte_socket_id());
-
-	if (ops == NULL) {
-		RTE_LOG(ERR, USER1,
-			"Can't allocate memory for ops strucures\n");
-		return -1;
-	}
-
-	deq_ops = &ops[test_data->total_bufs];
-
-	if (type == RTE_COMP_COMPRESS) {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_COMPRESS,
-			.compress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.deflate.huffman = test_data->huffman_enc,
-				.level = level,
-				.window_size = test_data->window_sz,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->decomp_bufs;
-		output_bufs = test_data->comp_bufs;
-	} else {
-		xform = (struct rte_comp_xform) {
-			.type = RTE_COMP_DECOMPRESS,
-			.decompress = {
-				.algo = RTE_COMP_ALGO_DEFLATE,
-				.chksum = RTE_COMP_CHECKSUM_NONE,
-				.window_size = test_data->window_sz,
-				.hash_algo = RTE_COMP_HASH_ALGO_NONE
-			}
-		};
-		input_bufs = test_data->comp_bufs;
-		output_bufs = test_data->decomp_bufs;
-	}
-
-	/* Create private xform */
-	if (rte_compressdev_private_xform_create(dev_id, &xform,
-			&priv_xform) < 0) {
-		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
-		res = -1;
-		goto end;
-	}
 
-	uint64_t tsc_start, tsc_end, tsc_duration;
-
-	tsc_start = tsc_end = tsc_duration = 0;
-	if (benchmarking) {
-		tsc_start = rte_rdtsc();
-		num_iter = test_data->num_iter;
-	} else
-		num_iter = 1;
-
-	for (iter = 0; iter < num_iter; iter++) {
-		uint32_t total_ops = test_data->total_bufs;
-		uint32_t remaining_ops = test_data->total_bufs;
-		uint32_t total_deq_ops = 0;
-		uint32_t total_enq_ops = 0;
-		uint16_t ops_unused = 0;
-		uint16_t num_enq = 0;
-		uint16_t num_deq = 0;
-
-		output_size = 0;
-
-		while (remaining_ops > 0) {
-			uint16_t num_ops = RTE_MIN(remaining_ops,
-						   test_data->burst_sz);
-			uint16_t ops_needed = num_ops - ops_unused;
-
-			/*
-			 * Move the unused operations from the previous
-			 * enqueue_burst call to the front, to maintain order
-			 */
-			if ((ops_unused > 0) && (num_enq > 0)) {
-				size_t nb_b_to_mov =
-				      ops_unused * sizeof(struct rte_comp_op *);
-
-				memmove(ops, &ops[num_enq], nb_b_to_mov);
-			}
-
-			/* Allocate compression operations */
-			if (ops_needed && !rte_comp_op_bulk_alloc(
-						test_data->op_pool,
-						&ops[ops_unused],
-						ops_needed)) {
-				RTE_LOG(ERR, USER1,
-				      "Could not allocate enough operations\n");
-				res = -1;
-				goto end;
-			}
-			allocated += ops_needed;
-
-			for (i = 0; i < ops_needed; i++) {
-				/*
-				 * Calculate next buffer to attach to operation
-				 */
-				uint32_t buf_id = total_enq_ops + i +
-						ops_unused;
-				uint16_t op_id = ops_unused + i;
-				/* Reset all data in output buffers */
-				struct rte_mbuf *m = output_bufs[buf_id];
-
-				m->pkt_len = test_data->seg_sz * m->nb_segs;
-				while (m) {
-					m->data_len = m->buf_len - m->data_off;
-					m = m->next;
-				}
-				ops[op_id]->m_src = input_bufs[buf_id];
-				ops[op_id]->m_dst = output_bufs[buf_id];
-				ops[op_id]->src.offset = 0;
-				ops[op_id]->src.length =
-					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
-				ops[op_id]->dst.offset = 0;
-				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
-				ops[op_id]->input_chksum = buf_id;
-				ops[op_id]->private_xform = priv_xform;
-			}
-
-			num_enq = rte_compressdev_enqueue_burst(dev_id, 0, ops,
-								num_ops);
-			ops_unused = num_ops - num_enq;
-			remaining_ops -= num_enq;
-			total_enq_ops += num_enq;
-
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-							   deq_ops,
-							   test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst, 0,
-						op->produced, output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							  op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-							RTE_MIN(remaining_data,
-							     test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-
-		/* Dequeue the last operations */
-		while (total_deq_ops < total_ops) {
-			num_deq = rte_compressdev_dequeue_burst(dev_id, 0,
-						deq_ops, test_data->burst_sz);
-			total_deq_ops += num_deq;
-			if (benchmarking == 0) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					const void *read_data_addr =
-						rte_pktmbuf_read(op->m_dst,
-							op->dst.offset,
-							op->produced,
-							output_data_ptr);
-					if (read_data_addr == NULL) {
-						RTE_LOG(ERR, USER1,
-				      "Could not copy buffer in destination\n");
-						res = -1;
-						goto end;
-					}
-
-					if (read_data_addr != output_data_ptr)
-						rte_memcpy(output_data_ptr,
-							rte_pktmbuf_mtod(
-							op->m_dst, uint8_t *),
-							op->produced);
-					output_data_ptr += op->produced;
-					output_size += op->produced;
-
-				}
-			}
-
-			if (iter == num_iter - 1) {
-				for (i = 0; i < num_deq; i++) {
-					struct rte_comp_op *op = deq_ops[i];
-					struct rte_mbuf *m = op->m_dst;
-
-					m->pkt_len = op->produced;
-					uint32_t remaining_data = op->produced;
-					uint16_t data_to_append;
-
-					while (remaining_data > 0) {
-						data_to_append =
-						RTE_MIN(remaining_data,
-							test_data->seg_sz);
-						m->data_len = data_to_append;
-						remaining_data -=
-								data_to_append;
-						m = m->next;
-					}
-				}
-			}
-			rte_mempool_put_bulk(test_data->op_pool,
-					     (void **)deq_ops, num_deq);
-			allocated -= num_deq;
-		}
-	}
-
-	if (benchmarking) {
-		tsc_end = rte_rdtsc();
-		tsc_duration = tsc_end - tsc_start;
-
-		if (type == RTE_COMP_COMPRESS)
-			test_data->comp_tsc_duration[level] =
-					tsc_duration / num_iter;
-		else
-			test_data->decomp_tsc_duration[level] =
-					tsc_duration / num_iter;
-	}
-
-	if (benchmarking == 0 && output_data_sz)
-		*output_data_sz = output_size;
-end:
-	rte_mempool_put_bulk(test_data->op_pool, (void **)ops, allocated);
-	rte_compressdev_private_xform_free(dev_id, priv_xform);
-	rte_free(ops);
-	return res;
-}
 
 int
 main(int argc, char **argv)
@@ -745,6 +466,7 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
 				rte_socket_id());
 
+	ret = EXIT_SUCCESS;
 	cleanup = ST_TEST_DATA;
 	comp_perf_options_default(test_data);
 
@@ -787,9 +509,6 @@ main(int argc, char **argv)
 	else
 		level = test_data->level.list[0];
 
-	size_t comp_data_sz;
-	size_t decomp_data_sz;
-
 	printf("Burst size = %u\n", test_data->burst_sz);
 	printf("File size = %zu\n", test_data->input_data_sz);
 
@@ -800,84 +519,27 @@ main(int argc, char **argv)
 
 	cleanup = ST_DURING_TEST;
 	while (level <= test_data->level.max) {
+
 		/*
 		 * Run a first iteration, to verify compression and
 		 * get the compression ratio for the level
 		 */
-		if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-			      test_data->compressed_data,
-			      &comp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-			      test_data->decompressed_data,
-			      &decomp_data_sz, 0) < 0) {
-			ret = EXIT_FAILURE;
-			goto end;
-		}
-
-		if (decomp_data_sz != test_data->input_data_sz) {
-			RTE_LOG(ERR, USER1,
-		   "Decompressed data length not equal to input data length\n");
-			RTE_LOG(ERR, USER1,
-				"Decompressed size = %zu, expected = %zu\n",
-				decomp_data_sz, test_data->input_data_sz);
-			ret = EXIT_FAILURE;
-			goto end;
-		} else {
-			if (memcmp(test_data->decompressed_data,
-					test_data->input_data,
-					test_data->input_data_sz) != 0) {
-				RTE_LOG(ERR, USER1,
-			    "Decompressed data is not the same as file data\n");
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		double ratio = (double) comp_data_sz /
-						test_data->input_data_sz * 100;
+		if (cperf_verification(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		/*
-		 * Run the tests twice, discarding the first performance
-		 * results, before the cache is warmed up
+		 * Run benchmarking test
 		 */
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_COMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		for (i = 0; i < 2; i++) {
-			if (main_loop(test_data, level, RTE_COMP_DECOMPRESS,
-					NULL, NULL, 1) < 0) {
-				ret = EXIT_FAILURE;
-				goto end;
-			}
-		}
-
-		uint64_t comp_tsc_duration =
-				test_data->comp_tsc_duration[level];
-		double comp_tsc_byte = (double)comp_tsc_duration /
-						test_data->input_data_sz;
-		double comp_gbps = rte_get_tsc_hz() / comp_tsc_byte * 8 /
-				1000000000;
-		uint64_t decomp_tsc_duration =
-				test_data->decomp_tsc_duration[level];
-		double decomp_tsc_byte = (double)decomp_tsc_duration /
-						test_data->input_data_sz;
-		double decomp_gbps = rte_get_tsc_hz() / decomp_tsc_byte * 8 /
-				1000000000;
+		if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
+			break;
 
 		printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
 					"%15.2f%21"PRIu64"%23.2f%16.2f\n",
-		       level, comp_data_sz, ratio, comp_tsc_duration,
-		       comp_tsc_byte, comp_gbps, decomp_tsc_duration,
-		       decomp_tsc_byte, decomp_gbps);
+		       level, test_data->comp_data_sz, test_data->ratio,
+		       test_data->comp_tsc_duration[level],
+		       test_data->comp_tsc_byte, test_data->comp_gbps,
+		       test_data->decomp_tsc_duration[level],
+		       test_data->decomp_tsc_byte, test_data->decomp_gbps);
 
 		if (test_data->level.inc != 0)
 			level += test_data->level.inc;
@@ -888,8 +550,6 @@ main(int argc, char **argv)
 		}
 	}
 
-	ret = EXIT_SUCCESS;
-
 end:
 	switch (cleanup) {
 
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index ba6d64d..ec73e5e 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -3,5 +3,7 @@
 
 allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
-		'main.c')
+		'main.c',
+		'comp_perf_test_verify.c',
+		'comp_perf_test_benchmark.c')
 deps = ['compressdev']
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance measurement
  2018-12-02  6:39                 ` Verma, Shally
@ 2018-12-05  8:51                   ` Jozwiak, TomaszX
  0 siblings, 0 replies; 76+ messages in thread
From: Jozwiak, TomaszX @ 2018-12-05  8:51 UTC (permalink / raw)
  To: Verma, Shally, Trahe, Fiona, Daly, Lee; +Cc: dev, akhil.goyal

Hi Shally,

> I don't think segsz is required to input then?

Yes, this param. together with others like: input sz and max-num-segs-sgl
gives possibility to find the best performance values set for each PMD, so let's keep it.


Br, Tomek




> -----Original Message-----
> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> Sent: Sunday, December 2, 2018 7:40 AM
> To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; Trahe, Fiona
> <fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
> Cc: dev@dpdk.org; akhil.goyal@nxp.com
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> Ok. Then to keep it simple can we keep input sz and max-num-segs-sgl at
> cmd line input. I don't think segsz is required to input then?
> 
> Thanks
> Shally
> 
> >-----Original Message-----
> >From: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>
> >Sent: 30 November 2018 20:13
> >To: Verma, Shally <Shally.Verma@cavium.com>; Trahe, Fiona
> ><fiona.trahe@intel.com>; Daly, Lee <lee.daly@intel.com>
> >Cc: dev@dpdk.org; akhil.goyal@nxp.com
> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >Hi Shally,
> >
> >I'm about of sending V5 of compression-perf tool.
> >
> >Our performance testing shows that the number of sgls in a chain can be a
> factor in the performance.
> >So we want to keep this on the cmd line for the performance tool.
> >There are alternatives, like setting the input size and segment size to
> >get the num segments desired, but I prefer to have the option to specify
> the num segments explicitly.
> >We'll document that if the max-num-sgl-segs x seg_sz > input size then
> >segments number in the chain will be lower ( to store all the
> >data)
> >As regards adding the max_nb_segments_per_sgl into the
> >rte_compressdev_info struct we're investigating another workaround to
> this limitation in QAT, so will leave this off the API unless some other PMD
> needs it.
> >In the meantime we'll document the limitation in QAT.
> >
> >Please let me know your thoughts.
> >
> >--
> >Tomek
> >
> >> -----Original Message-----
> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> Sent: Wednesday, October 17, 2018 6:48 PM
> >> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee
> >> <lee.daly@intel.com>
> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> akhil.goyal@nxp.com
> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> performance measurement
> >>
> >>
> >>
> >> >-----Original Message-----
> >> >From: Trahe, Fiona <fiona.trahe@intel.com>
> >> >Sent: 17 October 2018 22:15
> >> >To: Verma, Shally <Shally.Verma@cavium.com>; Daly, Lee
> >> ><lee.daly@intel.com>
> >> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >akhil.goyal@nxp.com; Trahe, Fiona <fiona.trahe@intel.com>
> >> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >performance measurement
> >> >
> >> >External Email
> >> >
> >> >> -----Original Message-----
> >> >> From: Verma, Shally [mailto:Shally.Verma@cavium.com]
> >> >> Sent: Wednesday, October 17, 2018 8:43 AM
> >> >> To: Trahe, Fiona <fiona.trahe@intel.com>; Daly, Lee
> >> >> <lee.daly@intel.com>
> >> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >> akhil.goyal@nxp.com
> >> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> performance measurement
> >> >>
> >> >>
> >> >>
> >> >> >-----Original Message-----
> >> >> >From: Trahe, Fiona <fiona.trahe@intel.com>
> >> >> >Sent: 17 October 2018 20:04
> >> >> >To: Daly, Lee <lee.daly@intel.com>; Verma, Shally
> >> >> ><Shally.Verma@cavium.com>
> >> >> >Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>; dev@dpdk.org;
> >> >> >akhil.goyal@nxp.com; Trahe, Fiona
> >> >> <fiona.trahe@intel.com>
> >> >> >Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> >performance measurement
> >> >> >
> >> >> >External Email
> >> >> >
> >> >> >Hi Shally, Lee,
> >> >> >
> >> >> >> -----Original Message-----
> >> >> >> From: Daly, Lee
> >> >> >> Sent: Monday, October 15, 2018 8:10 AM
> >> >> >> To: Verma, Shally <Shally.Verma@cavium.com>
> >> >> >> Cc: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>;
> dev@dpdk.org;
> >> >> >> Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com
> >> >> >> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> >> performance measurement
> >> >> >>
> >> >> >> Thanks for your input Shally see comments below.
> >> >> >>
> >> >> >>
> >> >> >> I will be reviewing these changes while Tomasz is out this week.
> >> >> >>
> >> >> >> > -----Original Message-----
> >> >> >> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Verma,
> >> >> >> > Shally
> >> >> >> > Sent: Friday, October 12, 2018 11:16 AM
> >> >> >> > To: Jozwiak, TomaszX <tomaszx.jozwiak@intel.com>;
> >> dev@dpdk.org;
> >> >> >> > Trahe, Fiona <fiona.trahe@intel.com>; akhil.goyal@nxp.com; De
> >> >> >> > Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
> >> >> >> > Cc: De@dpdk.org; Lara@dpdk.org; Guarch@dpdk.org
> >> >> >> > Subject: Re: [dpdk-dev] [PATCH 2/3] app/compress-perf: add
> >> >> >> > performance measurement
> >> >> >> >
> >> >> >///
> >> >> >
> >> >> >> >Also, why do we need --max-num-  sgl-segs as an input option
> >> >> >> >from user? Shouldn't input_sz and seg_sz  internally decide on
> >> >> >> >num-segs?
> >> >> >> > Or is it added to serve some other different purpose?
> >> >> >> Will have to get back to you on this one, seems illogical to
> >> >> >> get this input from user, But I will have to do further
> >> >> >> investigation to find if
> >> there was a different purpose.
> >> >> >
> >> >> >[Fiona] Some PMDs have a limit on how many links can be in an sgl
> >> >> >chain, e.g. in QAT case the PMD allocates a pool of internal
> >> >> >structures of a suitable size during device initialisation, this
> >> >> >is not a hard
> >> limit but can be configured in .config to give the user control over
> >> the memory resources allocated.
> >> >> >This perf-tool max-num-sgl-segs is so the user can pick a value
> >> >> ><=
> >> whatever the PMD's max is.
> >> >>
> >> >> Then also, I believe this could be taken care internally by an app.
> >> >> App can choose convenient number of sgl segs as per PMD capability
> >> >> and input sz and chunk sz selected by user.
> >> >> Just my thoughts.
> >> >[Fiona] Then we'd need to add this capability to the API, e.g. add
> >> >uint16_t max_nb_segments_per_sgl into the rte_compressdev_info
> struct.
> >> >Special case 0 means no limit.
> >> >We did consider this before, I can't remember why we didn't do it, I
> >> >think
> >> it's needed.
> >> >I'll push an API patch for this in 19.02 and we can remove the
> >> >--max-num-sgl-segs param from the performance tool and hardcode it
> >> >in
> >> the tool in the meantime.
> >> >Ok?
> >> Yea. Sounds better.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
                           ` (4 preceding siblings ...)
  2018-12-05  8:47         ` [dpdk-dev] [PATCH v5 5/5] app/compress-perf: code refactoring Tomasz Jozwiak
@ 2018-12-05 15:37         ` Daly, Lee
  2018-12-12 11:54         ` [dpdk-dev] [PATCH v6 " Tomasz Jozwiak
  2018-12-12 12:08         ` Tomasz Jozwiak
  7 siblings, 0 replies; 76+ messages in thread
From: Daly, Lee @ 2018-12-05 15:37 UTC (permalink / raw)
  To: Jozwiak, TomaszX, dev, Trahe, Fiona, Jozwiak, TomaszX,
	Shally.Verma, akhil.goyal



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tomasz Jozwiak
> Sent: Wednesday, December 5, 2018 8:47 AM
> To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; Jozwiak, TomaszX
> <tomaszx.jozwiak@intel.com>; Shally.Verma@cavium.com;
> akhil.goyal@nxp.com
> Subject: [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf
> 
> This patchset adds initial version of compression performance test.
> 
Series Acked-by: Lee Daly <lee.daly@intel.com>

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v6 0/5] add initial version of compress-perf
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
                           ` (5 preceding siblings ...)
  2018-12-05 15:37         ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Daly, Lee
@ 2018-12-12 11:54         ` Tomasz Jozwiak
  2018-12-17 11:11           ` Verma, Shally
  2018-12-12 12:08         ` Tomasz Jozwiak
  7 siblings, 1 reply; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-12 11:54 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v6 changes:
  - Updated the 19.02 release notes

v5 changes:
  - Fixed documentation
  - Added op's status checking after rte_compressdev_dequeue_burst
  - code cleanup

v4 changes:
  - fixed checkpatch issues
  - code cleanup

v3 changes:
  - Added dynamic compression
  - Code refactoring to separate validation
    from benchmarking part
  - Updated documentation
  - Added fail detection from rte_compressdev_enqueue_burst
    and rte_compressdev_dequeue_burst functions
  - Code cleanup

v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation

Tomasz Jozwiak (5):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files
  app/compress-perf: add dynamic compression test
  app/compress-perf: code refactoring

 MAINTAINERS                                       |   5 +
 app/Makefile                                      |   4 +
 app/meson.build                                   |   1 +
 app/test-compress-perf/Makefile                   |  18 +
 app/test-compress-perf/comp_perf_options.h        |  71 +++
 app/test-compress-perf/comp_perf_options_parse.c  | 596 ++++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.c | 308 +++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 353 +++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 590 +++++++++++++++++++++
 app/test-compress-perf/meson.build                |   9 +
 config/common_base                                |   5 +
 doc/guides/rel_notes/release_19_02.rst            |   5 +
 doc/guides/tools/comp_perf.rst                    |  81 +++
 doc/guides/tools/index.rst                        |   1 +
 16 files changed, 2073 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v6 0/5] add initial version of compress-perf
  2018-12-05  8:47       ` [dpdk-dev] [PATCH v5 0/5] add initial version of compress-perf Tomasz Jozwiak
                           ` (6 preceding siblings ...)
  2018-12-12 11:54         ` [dpdk-dev] [PATCH v6 " Tomasz Jozwiak
@ 2018-12-12 12:08         ` Tomasz Jozwiak
  2018-12-12 12:08           ` [dpdk-dev] [PATCH v6 1/5] app/compress-perf: add parser Tomasz Jozwiak
                             ` (6 more replies)
  7 siblings, 7 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-12 12:08 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

This patchset adds initial version of compression performance
test.

v6 changes:
  - Updated the 19.02 release notes

v5 changes:
  - Fixed documentation
  - Added op's status checking after rte_compressdev_dequeue_burst
  - code cleanup

v4 changes:
  - fixed checkpatch issues
  - code cleanup

v3 changes:
  - Added dynamic compression
  - Code refactoring to separate validation
    from benchmarking part
  - Updated documentation
  - Added fail detection from rte_compressdev_enqueue_burst
    and rte_compressdev_dequeue_burst functions
  - Code cleanup

v2 changes:

  -  Added release note
  -  Added new cleanup flow into main function
  -  Blocked dynamic compression test because it hasn't been
     tested enough
  -  Changed `--max-num-sgl-segs' default value to 16
  -  Updated documentation

Tomasz Jozwiak (5):
  app/compress-perf: add parser
  app/compress-perf: add performance measurement
  doc/guides/tools: add doc files
  app/compress-perf: add dynamic compression test
  app/compress-perf: code refactoring

 MAINTAINERS                                       |   5 +
 app/Makefile                                      |   4 +
 app/meson.build                                   |   1 +
 app/test-compress-perf/Makefile                   |  18 +
 app/test-compress-perf/comp_perf_options.h        |  71 +++
 app/test-compress-perf/comp_perf_options_parse.c  | 596 ++++++++++++++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.c | 308 +++++++++++
 app/test-compress-perf/comp_perf_test_benchmark.h |  13 +
 app/test-compress-perf/comp_perf_test_verify.c    | 353 +++++++++++++
 app/test-compress-perf/comp_perf_test_verify.h    |  13 +
 app/test-compress-perf/main.c                     | 590 +++++++++++++++++++++
 app/test-compress-perf/meson.build                |   9 +
 config/common_base                                |   5 +
 doc/guides/rel_notes/release_19_02.rst            |   5 +
 doc/guides/tools/comp_perf.rst                    |  81 +++
 doc/guides/tools/index.rst                        |   1 +
 16 files changed, 2073 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.c
 create mode 100644 app/test-compress-perf/comp_perf_test_benchmark.h
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.c
 create mode 100644 app/test-compress-perf/comp_perf_test_verify.h
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build
 create mode 100644 doc/guides/tools/comp_perf.rst

-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v6 1/5] app/compress-perf: add parser
  2018-12-12 12:08         ` Tomasz Jozwiak
@ 2018-12-12 12:08           ` Tomasz Jozwiak
  2018-12-12 12:08           ` [dpdk-dev] [PATCH v6 2/5] app/compress-perf: add performance measurement Tomasz Jozwiak
                             ` (5 subsequent siblings)
  6 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-12 12:08 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added parser part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/Makefile                                     |   4 +
 app/meson.build                                  |   1 +
 app/test-compress-perf/Makefile                  |  16 +
 app/test-compress-perf/comp_perf_options.h       |  59 +++
 app/test-compress-perf/comp_perf_options_parse.c | 592 +++++++++++++++++++++++
 app/test-compress-perf/main.c                    |  52 ++
 app/test-compress-perf/meson.build               |   7 +
 config/common_base                               |   5 +
 8 files changed, 736 insertions(+)
 create mode 100644 app/test-compress-perf/Makefile
 create mode 100644 app/test-compress-perf/comp_perf_options.h
 create mode 100644 app/test-compress-perf/comp_perf_options_parse.c
 create mode 100644 app/test-compress-perf/main.c
 create mode 100644 app/test-compress-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index 069fa98..d6641ef 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -11,6 +11,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y)
 DIRS-$(CONFIG_RTE_TEST_BBDEV) += test-bbdev
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
+DIRS-$(CONFIG_RTE_APP_COMPRESS_PERF) += test-compress-perf
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_APP_CRYPTO_PERF) += test-crypto-perf
 endif
diff --git a/app/meson.build b/app/meson.build
index a9a026b..47a2a86 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -4,6 +4,7 @@
 apps = ['pdump',
 	'proc-info',
 	'test-bbdev',
+	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-pmd']
diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
new file mode 100644
index 0000000..8aa7a22
--- /dev/null
+++ b/app/test-compress-perf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = dpdk-test-compress-perf
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+SRCS-y += comp_perf_options_parse.c
+
+include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
new file mode 100644
index 0000000..7516ea0
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#define MAX_DRIVER_NAME		64
+#define MAX_INPUT_FILE_NAME	64
+#define MAX_LIST		32
+
+enum comp_operation {
+	COMPRESS_ONLY,
+	DECOMPRESS_ONLY,
+	COMPRESS_DECOMPRESS
+};
+
+struct range_list {
+	uint8_t min;
+	uint8_t max;
+	uint8_t inc;
+	uint8_t count;
+	uint8_t list[MAX_LIST];
+};
+
+struct comp_test_data {
+	char driver_name[64];
+	char input_file[64];
+	struct rte_mbuf **comp_bufs;
+	struct rte_mbuf **decomp_bufs;
+	uint32_t total_bufs;
+	uint8_t *input_data;
+	size_t input_data_sz;
+	uint8_t *compressed_data;
+	uint8_t *decompressed_data;
+	struct rte_mempool *comp_buf_pool;
+	struct rte_mempool *decomp_buf_pool;
+	struct rte_mempool *op_pool;
+	int8_t cdev_id;
+	uint16_t seg_sz;
+	uint16_t burst_sz;
+	uint32_t pool_sz;
+	uint32_t num_iter;
+	uint16_t max_sgl_segs;
+	enum rte_comp_huffman huffman_enc;
+	enum comp_operation test_op;
+	int window_sz;
+	struct range_list level;
+	/* Store TSC duration for all levels (including level 0) */
+	uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+	uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
+};
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc,
+			char **argv);
+
+void
+comp_perf_options_default(struct comp_test_data *test_data);
+
+int
+comp_perf_options_check(struct comp_test_data *test_data);
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
new file mode 100644
index 0000000..7f1a7ff
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+#include <rte_comp.h>
+
+#include "comp_perf_options.h"
+
+#define CPERF_DRIVER_NAME	("driver-name")
+#define CPERF_TEST_FILE		("input-file")
+#define CPERF_SEG_SIZE		("seg-sz")
+#define CPERF_BURST_SIZE	("burst-sz")
+#define CPERF_EXTENDED_SIZE	("extended-input-sz")
+#define CPERF_POOL_SIZE		("pool-sz")
+#define CPERF_MAX_SGL_SEGS	("max-num-sgl-segs")
+#define CPERF_NUM_ITER		("num-iter")
+#define CPERF_OPTYPE		("operation")
+#define CPERF_HUFFMAN_ENC	("huffman-enc")
+#define CPERF_LEVEL		("compress-level")
+#define CPERF_WINDOW_SIZE	("window-sz")
+
+struct name_id_map {
+	const char *name;
+	uint32_t id;
+};
+
+static void
+usage(char *progname)
+{
+	printf("%s [EAL options] --\n"
+		" --driver-name NAME: compress driver to use\n"
+		" --input-file NAME: file to compress and decompress\n"
+		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
+		" --seg-sz N: size of segment to store the data (default: 2048)\n"
+		" --burst-sz N: compress operation burst size\n"
+		" --pool-sz N: mempool size for compress operations/mbufs\n"
+		"		(default: 8192)\n"
+		" --max-num-sgl-segs N: maximum number of segments for each mbuf\n"
+		"		(default: 16)\n"
+		" --num-iter N: number of times the file will be\n"
+		"		compressed/decompressed (default: 10000)\n"
+		" --operation [comp/decomp/comp_and_decomp]: perform test on\n"
+		"		compression, decompression or both operations\n"
+		" --huffman-enc [fixed/default]: Huffman encoding\n"
+		"		(default: fixed)\n"
+		" --compress-level N: compression level, which could be a single value, list or range\n"
+		"		(default: range between 1 and 9)\n"
+		" --window-sz N: base two log value of compression window size\n"
+		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
+		" -h: prints this help\n",
+		progname);
+}
+
+static int
+get_str_key_id_mapping(struct name_id_map *map, unsigned int map_len,
+		const char *str_key)
+{
+	unsigned int i;
+
+	for (i = 0; i < map_len; i++) {
+
+		if (strcmp(str_key, map[i].name) == 0)
+			return map[i].id;
+	}
+
+	return -1;
+}
+
+static int
+parse_uint32_t(uint32_t *value, const char *arg)
+{
+	char *end = NULL;
+	unsigned long n = strtoul(arg, &end, 10);
+
+	if ((optarg[0] == '\0') || (end == NULL) || (*end != '\0'))
+		return -1;
+
+	if (n > UINT32_MAX)
+		return -ERANGE;
+
+	*value = (uint32_t) n;
+
+	return 0;
+}
+
+static int
+parse_uint16_t(uint16_t *value, const char *arg)
+{
+	uint32_t val = 0;
+	int ret = parse_uint32_t(&val, arg);
+
+	if (ret < 0)
+		return ret;
+
+	if (val > UINT16_MAX)
+		return -ERANGE;
+
+	*value = (uint16_t) val;
+
+	return 0;
+}
+
+static int
+parse_range(const char *arg, uint8_t *min, uint8_t *max, uint8_t *inc)
+{
+	char *token;
+	uint8_t number;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ":");
+
+	/* Parse minimum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_range;
+
+		*min = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse increment value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number == 0)
+			goto err_range;
+
+		*inc = number;
+	} else
+		goto err_range;
+
+	token = strtok(NULL, ":");
+
+	/* Parse maximum value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE ||
+				number < *min)
+			goto err_range;
+
+		*max = number;
+	} else
+		goto err_range;
+
+	if (strtok(NULL, ":") != NULL)
+		goto err_range;
+
+	free(copy_arg);
+	return 0;
+
+err_range:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_list(const char *arg, uint8_t *list, uint8_t *min, uint8_t *max)
+{
+	char *token;
+	uint32_t number;
+	uint8_t count = 0;
+	uint32_t temp_min;
+	uint32_t temp_max;
+
+	char *copy_arg = strdup(arg);
+
+	if (copy_arg == NULL)
+		return -1;
+
+	errno = 0;
+	token = strtok(copy_arg, ",");
+
+	/* Parse first value */
+	if (token != NULL) {
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+		temp_min = number;
+		temp_max = number;
+	} else
+		goto err_list;
+
+	token = strtok(NULL, ",");
+
+	while (token != NULL) {
+		if (count == MAX_LIST) {
+			RTE_LOG(WARNING, USER1,
+				"Using only the first %u sizes\n",
+					MAX_LIST);
+			break;
+		}
+
+		number = strtoul(token, NULL, 10);
+
+		if (errno == EINVAL || errno == ERANGE)
+			goto err_list;
+
+		list[count++] = number;
+
+		if (number < temp_min)
+			temp_min = number;
+		if (number > temp_max)
+			temp_max = number;
+
+		token = strtok(NULL, ",");
+	}
+
+	if (min)
+		*min = temp_min;
+	if (max)
+		*max = temp_max;
+
+	free(copy_arg);
+	return count;
+
+err_list:
+	free(copy_arg);
+	return -1;
+}
+
+static int
+parse_num_iter(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->num_iter, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse total iteration count\n");
+		return -1;
+	}
+
+	if (test_data->num_iter == 0) {
+		RTE_LOG(ERR, USER1,
+				"Total number of iterations must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_pool_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint32_t(&test_data->pool_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse pool size");
+		return -1;
+	}
+
+	if (test_data->pool_sz == 0) {
+		RTE_LOG(ERR, USER1, "Pool size must be higher than 0\n");
+		return -1;
+	}
+
+	return ret;
+}
+
+static int
+parse_burst_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->burst_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse burst size/s\n");
+		return -1;
+	}
+
+	if (test_data->burst_sz == 0) {
+		RTE_LOG(ERR, USER1, "Burst size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_extended_input_sz(struct comp_test_data *test_data, const char *arg)
+{
+	uint32_t tmp;
+	int ret = parse_uint32_t(&tmp, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse extended input size\n");
+		return -1;
+	}
+	test_data->input_data_sz = tmp;
+
+	if (tmp == 0) {
+		RTE_LOG(ERR, USER1,
+			"Extended file size must be higher than 0\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+parse_seg_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->seg_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse segment size\n");
+		return -1;
+	}
+
+	if (test_data->seg_sz == 0) {
+		RTE_LOG(ERR, USER1, "Segment size must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_max_num_sgl_segs(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t(&test_data->max_sgl_segs, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1,
+			"Failed to parse max number of segments per mbuf chain\n");
+		return -1;
+	}
+
+	if (test_data->max_sgl_segs == 0) {
+		RTE_LOG(ERR, USER1, "Max number of segments per mbuf chain "
+			"must be higher than 0\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_window_sz(struct comp_test_data *test_data, const char *arg)
+{
+	int ret = parse_uint16_t((uint16_t *)&test_data->window_sz, arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse window size\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+parse_driver_name(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->driver_name) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->driver_name, arg,
+			sizeof(test_data->driver_name));
+
+	return 0;
+}
+
+static int
+parse_test_file(struct comp_test_data *test_data, const char *arg)
+{
+	if (strlen(arg) > (sizeof(test_data->input_file) - 1))
+		return -1;
+
+	rte_strlcpy(test_data->input_file, arg, sizeof(test_data->input_file));
+
+	return 0;
+}
+
+static int
+parse_op_type(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map optype_namemap[] = {
+		{
+			"comp",
+			COMPRESS_ONLY
+		},
+		{
+			"decomp",
+			DECOMPRESS_ONLY
+		},
+		{
+			"comp_and_decomp",
+			COMPRESS_DECOMPRESS
+		}
+	};
+
+	int id = get_str_key_id_mapping(optype_namemap,
+			RTE_DIM(optype_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid operation type specified\n");
+		return -1;
+	}
+
+	test_data->test_op = (enum comp_operation)id;
+
+	return 0;
+}
+
+static int
+parse_huffman_enc(struct comp_test_data *test_data, const char *arg)
+{
+	struct name_id_map huffman_namemap[] = {
+		{
+			"default",
+			RTE_COMP_HUFFMAN_DEFAULT
+		},
+		{
+			"fixed",
+			RTE_COMP_HUFFMAN_FIXED
+		}
+	};
+
+	int id = get_str_key_id_mapping(huffman_namemap,
+			RTE_DIM(huffman_namemap), arg);
+	if (id < 0) {
+		RTE_LOG(ERR, USER1, "Invalid Huffmane encoding specified\n");
+		return -1;
+	}
+
+	test_data->huffman_enc = (enum rte_comp_huffman)id;
+
+	return 0;
+}
+
+static int
+parse_level(struct comp_test_data *test_data, const char *arg)
+{
+	int ret;
+
+	/*
+	 * Try parsing the argument as a range, if it fails,
+	 * arse it as a list
+	 */
+	if (parse_range(arg, &test_data->level.min, &test_data->level.max,
+			&test_data->level.inc) < 0) {
+		ret = parse_list(arg, test_data->level.list,
+					&test_data->level.min,
+					&test_data->level.max);
+		if (ret < 0) {
+			RTE_LOG(ERR, USER1,
+				"Failed to parse compression level/s\n");
+			return -1;
+		}
+		test_data->level.count = ret;
+
+		if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
+			RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
+					RTE_COMP_LEVEL_MAX);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+typedef int (*option_parser_t)(struct comp_test_data *test_data,
+		const char *arg);
+
+struct long_opt_parser {
+	const char *lgopt_name;
+	option_parser_t parser_fn;
+
+};
+
+static struct option lgopts[] = {
+
+	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
+	{ CPERF_TEST_FILE, required_argument, 0, 0 },
+	{ CPERF_SEG_SIZE, required_argument, 0, 0 },
+	{ CPERF_BURST_SIZE, required_argument, 0, 0 },
+	{ CPERF_EXTENDED_SIZE, required_argument, 0, 0 },
+	{ CPERF_POOL_SIZE, required_argument, 0, 0 },
+	{ CPERF_MAX_SGL_SEGS, required_argument, 0, 0},
+	{ CPERF_NUM_ITER, required_argument, 0, 0 },
+	{ CPERF_OPTYPE,	required_argument, 0, 0 },
+	{ CPERF_HUFFMAN_ENC, required_argument, 0, 0 },
+	{ CPERF_LEVEL, required_argument, 0, 0 },
+	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
+	{ NULL, 0, 0, 0 }
+};
+static int
+comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
+{
+	struct long_opt_parser parsermap[] = {
+		{ CPERF_DRIVER_NAME,	parse_driver_name },
+		{ CPERF_TEST_FILE,	parse_test_file },
+		{ CPERF_SEG_SIZE,	parse_seg_sz },
+		{ CPERF_BURST_SIZE,	parse_burst_sz },
+		{ CPERF_EXTENDED_SIZE,	parse_extended_input_sz },
+		{ CPERF_POOL_SIZE,	parse_pool_sz },
+		{ CPERF_MAX_SGL_SEGS,	parse_max_num_sgl_segs },
+		{ CPERF_NUM_ITER,	parse_num_iter },
+		{ CPERF_OPTYPE,		parse_op_type },
+		{ CPERF_HUFFMAN_ENC,	parse_huffman_enc },
+		{ CPERF_LEVEL,		parse_level },
+		{ CPERF_WINDOW_SIZE,	parse_window_sz },
+	};
+	unsigned int i;
+
+	for (i = 0; i < RTE_DIM(parsermap); i++) {
+		if (strncmp(lgopts[opt_idx].name, parsermap[i].lgopt_name,
+				strlen(lgopts[opt_idx].name)) == 0)
+			return parsermap[i].parser_fn(test_data, optarg);
+	}
+
+	return -EINVAL;
+}
+
+int
+comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
+{
+	int opt, retval, opt_idx;
+
+	while ((opt = getopt_long(argc, argv, "h", lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 'h':
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			break;
+		/* long options */
+		case 0:
+			retval = comp_perf_opts_parse_long(opt_idx, test_data);
+			if (retval != 0)
+				return retval;
+
+			break;
+
+		default:
+			usage(argv[0]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+void
+comp_perf_options_default(struct comp_test_data *test_data)
+{
+	test_data->cdev_id = -1;
+	test_data->seg_sz = 2048;
+	test_data->burst_sz = 32;
+	test_data->pool_sz = 8192;
+	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->num_iter = 10000;
+	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
+	test_data->test_op = COMPRESS_DECOMPRESS;
+	test_data->window_sz = -1;
+	test_data->level.min = 1;
+	test_data->level.max = 9;
+	test_data->level.inc = 1;
+}
+
+int
+comp_perf_options_check(struct comp_test_data *test_data)
+{
+	if (test_data->driver_name[0] == '\0') {
+		RTE_LOG(ERR, USER1, "Driver name has to be set\n");
+		return -1;
+	}
+
+	if (test_data->input_file[0] == '\0') {
+		RTE_LOG(ERR, USER1, "Input file name has to be set\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
new file mode 100644
index 0000000..f52b98d
--- /dev/null
+++ b/app/test-compress-perf/main.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_compressdev.h>
+
+#include "comp_perf_options.h"
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	struct comp_test_data *test_data;
+
+	/* Initialise DPDK EAL */
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
+	argc -= ret;
+	argv += ret;
+
+	test_data = rte_zmalloc_socket(NULL, sizeof(struct comp_test_data),
+					0, rte_socket_id());
+
+	if (test_data == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot reserve memory in socket %d\n",
+				rte_socket_id());
+
+	comp_perf_options_default(test_data);
+
+	if (comp_perf_options_parse(test_data, argc, argv) < 0) {
+		RTE_LOG(ERR, USER1,
+			"Parsing one or more user options failed\n");
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	if (comp_perf_options_check(test_data) < 0) {
+		ret = EXIT_FAILURE;
+		goto err;
+	}
+
+	ret = EXIT_SUCCESS;
+
+err:
+	rte_free(test_data);
+
+	return ret;
+}
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
new file mode 100644
index 0000000..ba6d64d
--- /dev/null
+++ b/app/test-compress-perf/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('comp_perf_options_parse.c',
+		'main.c')
+deps = ['compressdev']
diff --git a/config/common_base b/config/common_base
index d12ae98..2ab4b7b 100644
--- a/config/common_base
+++ b/config/common_base
@@ -949,6 +949,11 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 CONFIG_RTE_TEST_BBDEV=y
 
 #
+# Compile the compression performance application
+#
+CONFIG_RTE_APP_COMPRESS_PERF=y
+
+#
 # Compile the crypto performance application
 #
 CONFIG_RTE_APP_CRYPTO_PERF=y
-- 
2.7.4

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [dpdk-dev] [PATCH v6 2/5] app/compress-perf: add performance measurement
  2018-12-12 12:08         ` Tomasz Jozwiak
  2018-12-12 12:08           ` [dpdk-dev] [PATCH v6 1/5] app/compress-perf: add parser Tomasz Jozwiak
@ 2018-12-12 12:08           ` Tomasz Jozwiak
  2018-12-12 12:08           ` [dpdk-dev] [PATCH v6 3/5] doc/guides/tools: add doc files Tomasz Jozwiak
                             ` (4 subsequent siblings)
  6 siblings, 0 replies; 76+ messages in thread
From: Tomasz Jozwiak @ 2018-12-12 12:08 UTC (permalink / raw)
  To: dev, fiona.trahe, tomaszx.jozwiak, Shally.Verma, akhil.goyal

Added performance measurement part into compression perf. test.

Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>
Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
---
 app/test-compress-perf/comp_perf_options_parse.c |   2 +-
 app/test-compress-perf/main.c                    | 888 ++++++++++++++++++++++-
 2 files changed, 884 insertions(+), 6 deletions(-)

diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 7f1a7ff..add5c8a 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -565,7 +565,7 @@ comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->seg_sz = 2048;
 	test_data->burst_sz = 32;
 	test_data->pool_sz = 8192;
-	test_data->max_sgl_segs = UINT16_MAX;
+	test_data->max_sgl_segs = 16;
 	test_data->num_iter = 10000;
 	test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
 	test_data->test_op = COMPRESS_DECOMPRESS;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index f52b98d..4b183a8 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -5,14 +5,730 @@
 #include <rte_malloc.h>
 #include <rte_eal.h>
 #include <rte_log.h>
+#include <rte_cycles.h>
 #include <rte_compressdev.h>
 
 #include "comp_perf_options.h"
 
+#define NUM_MAX_XFORMS 16
+#define NUM_MAX_INFLIGHT_OPS 512
+#define EXPANSE_RATIO 1.05
+#define MIN_COMPRESSED_BUF_SIZE 8
+
+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
+
+/* Cleanup state machine */
+static enum cleanup_st {
+	ST_CLEAR = 0,
+	ST_TEST_DATA,
+	ST_COMPDEV,
+	ST_INPUT_DATA,
+	ST_MEMORY_ALLOC,
+	ST_PREPARE_BUF,
+	ST_DURING_TEST
+} cleanup = ST_CLEAR;
+
+static int
+param_range_check(uint16_t size, const struct rte_