DPDK patches and discussions
 help / color / Atom feed
* [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
@ 2020-03-17 13:46 Wisam Jaddo
  2020-03-20  6:49 ` Jerin Jacob
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  0 siblings, 2 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-03-17 13:46 UTC (permalink / raw)
  To: dev, thomas, matan; +Cc: rasland

Introducing new application for rte_flow performance
testing. The application provide the ability to test
insertion rate of specific rte_flow rule, by stressing
it to the NIC, and calculate the insertion rate.

It also provides packet per second measurements
after the insertion operation is done.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing rules
with same pattern but increasing the outer IP source
address by 1 each time, thus it will give different
flow each time, and all other items will have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion rate
measurement support in the app.

The app supports single and multi core performance
measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/Makefile                     |   1 +
 app/meson.build                  |   1 +
 app/test-flow-perf/Makefile      |  28 ++
 app/test-flow-perf/actions_gen.c |  26 ++
 app/test-flow-perf/actions_gen.h |  15 +
 app/test-flow-perf/flow_gen.c    |  97 ++++++
 app/test-flow-perf/flow_gen.h    |  47 +++
 app/test-flow-perf/items_gen.c   |  37 +++
 app/test-flow-perf/items_gen.h   |  16 +
 app/test-flow-perf/main.c        | 656 +++++++++++++++++++++++++++++++++++++++
 app/test-flow-perf/meson.build   |  14 +
 config/common_base               |   5 +
 12 files changed, 943 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build

diff --git a/app/Makefile b/app/Makefile
index db9d2d5..694df67 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
 
diff --git a/app/meson.build b/app/meson.build
index 71109cc..20d77b0 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -14,6 +14,7 @@ apps = [
 	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
+	'test-flow-perf',
 	'test-pipeline',
 	'test-pmd',
 	'test-sad']
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 0000000..d633725
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+
+#
+# library name
+#
+APP = flow_perf
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-deprecated-declarations
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y := items_gen.c
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 0000000..a40ec0e
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include "actions_gen.h"
+
+static struct rte_flow_action_queue queue_action;
+static struct rte_flow_action_mark mark_action;
+
+
+static void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+static void
+gen_mark(uint32_t mark_id)
+{
+	mark_action.id = mark_id;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 0000000..a690a1a
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#define MAX_ACTIONS_NUM   4
+
+static void
+gen_queue(uint16_t queue);
+
+static void
+gen_mark(uint32_t mark_id);
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 0000000..74d2908
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include "flow_gen.h"
+#include "items_gen.c"
+#include "actions_gen.c"
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+		uint16_t flow_items,
+		uint16_t flow_actions,
+		uint8_t flow_attrs,
+		uint16_t group_id,
+		uint16_t nr_queues,
+		uint32_t outer_ip_src,
+		struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+	int res;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group_id);
+
+	fill_actions(actions, flow_actions, nr_queues, outer_ip_src);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	res = rte_flow_validate(port_id, &attr, items, actions, error);
+	if (!res)
+		flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group_id)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group_id;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint16_t nr_queues, uint32_t counter)
+{
+	uint8_t actions_counter = 0;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		gen_mark(1);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % nr_queues);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 0000000..b006d10
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#define MAX_ACTIONS_NUM   4
+#define MAX_ITEMS_NUM     8
+
+/* Items */
+#define ETH_ITEM  0x0001
+#define IPV4_ITEM 0x0002
+
+/* Actions */
+#define QUEUE_ACTION 0x0001
+#define MARK_ACTION  0x0002
+#define DROP_ACTION  0x0004
+
+/* Attributes */
+#define INGRESS  0x0001
+#define EGRESS   0x0002
+#define TRANSFER 0x0004
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+		uint16_t flow_items,
+		uint16_t flow_actions,
+		uint8_t flow_attrs,
+		uint16_t group_id,
+		uint16_t nr_queues,
+		uint32_t outer_ip_src,
+		struct rte_flow_error *error);
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group_id);
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src);
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint16_t nr_queues, uint32_t counter);
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 0000000..029d8c6
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include "items_gen.h"
+
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+
+static inline void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	RTE_SET_USED(eth_spec);
+	RTE_SET_USED(eth_mask);
+	RTE_SET_USED(items);
+	RTE_SET_USED(items_counter);
+}
+
+static inline void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	RTE_SET_USED(ipv4_spec);
+	RTE_SET_USED(ipv4_mask);
+	RTE_SET_USED(items);
+	RTE_SET_USED(items_counter);
+	RTE_SET_USED(src_ipv4);
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 0000000..65ef410
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items related methods
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#define MAX_ITEMS_NUM	8
+
+static inline void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+static inline void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 0000000..201870f
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,656 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+
+#include "flow_gen.h"
+#include "flow_gen.c"
+
+#define MAX_PKT_BURST 32
+#define LCORE_MODE_PKT 1
+#define LCORE_MODE_STATS 2
+#define MAX_STREAMS 64
+#define MAX_LCORES 64
+
+/* User Parameters */
+#define FLOWS_COUNT 4000000
+#define ITER_COUNT  100000
+#define MAX_ITER    100
+#define RXQs 8
+#define TXQs 8
+#define FLOW_TABLE 1
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD	256
+#define NR_TXD	256
+
+
+struct rte_flow *flow;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+
+static struct lcore_info lcore_infos[MAX_LCORES];
+static void usage(char *progname)
+{
+	RTE_SET_USED(progname);
+	printf("usage: Help will be implemented here :)");
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		{ "help",                       0, 0, 0 },
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "ether",                      0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+	};
+
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
+
+	printf(":: Flow -> ");
+	argvopt = argv;
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
+				flow_attrs |= INGRESS;
+				printf("ingress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "egress")) {
+				flow_attrs |= EGRESS;
+				printf("egress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
+				flow_attrs |= TRANSFER;
+				printf("transfer ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ether")) {
+				flow_items |= ETH_ITEM;
+				printf("ether / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
+				flow_items |= IPV4_ITEM;
+				printf("ipv4 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "queue")) {
+				flow_actions |= QUEUE_ACTION;
+				printf("queue / ");
+			}
+			break;
+		default:
+			usage(argv[0]);
+			printf("Invalid option: %s\n", argv[optind]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+			error.type,
+			error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_creator(void)
+{
+	struct rte_flow_error error;
+	clock_t start, end, start_iter, end_iter;
+	double cpu_time_used, flows_rate;
+	double cpu_time_per_iter[MAX_ITER];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	uint32_t eagin_counter = 0;
+	int port_id;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITER; i++)
+		cpu_time_per_iter[i] = -1;
+
+	printf(":: Flows Count per port: %d\n", FLOWS_COUNT);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start = clock();
+		start_iter = clock();
+		for (i = 0; i < FLOWS_COUNT; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_items,
+					flow_actions, flow_attrs, FLOW_TABLE,
+					RXQs, i, &error);
+				if (!flow)
+					eagin_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = FLOWS_COUNT;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % ITER_COUNT)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				cpu_time_per_iter[((i + 1) / ITER_COUNT) - 1] =
+					delta / CLOCKS_PER_SEC;
+				start_iter = clock();
+			}
+		}
+		end = clock();
+		cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
+
+		/* Iteration rate per iteration */
+		for (i = 0; i < MAX_ITER; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double) (ITER_COUNT / cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows in %f sec[ Rate = %f K/Sec ]\n",
+			i, ITER_COUNT, cpu_time_per_iter[i], flows_rate);
+		}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (FLOWS_COUNT / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						FLOWS_COUNT, cpu_time_used);
+		printf(":: EAGIN counter = %d\n", eagin_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
+}
+
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Here we convert numbers into pretty numbers that easy to
+ * read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info old[MAX_LCORES];
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%16s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%16s %16s %16s %16s\n", "------",
+			"----------------", "----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/** First logical core is reserved for stats printing **/
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQs;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		for (queue = 0; queue < RXQs; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/** Print all streams **/
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/** No streams for this core **/
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i;
+	uint16_t port_id;
+	uint16_t nr_ports = rte_eth_dev_count_avail();
+	struct rte_eth_conf port_conf = {
+		.rxmode = {
+			.split_hdr_size = 0,
+		},
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+					ETH_RSS_IP  |
+					ETH_RSS_UDP |
+					ETH_RSS_TCP,
+		},
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"Error during getting device (port %u) info: %s\n",
+					port_id, strerror(-ret));
+
+		printf(":: initializing port: %d\n", port_id);
+		ret = rte_eth_dev_configure(port_id, RXQs, TXQs, &port_conf);
+		if (ret < 0) {
+			rte_exit(EXIT_FAILURE,
+					":: cannot configure device: err=%d, port=%u\n",
+					ret, port_id);
+			}
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+		for (i = 0; i < RXQs; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+						rte_eth_dev_socket_id(port_id),
+						&rxq_conf,
+						mbuf_mp);
+			if (ret < 0) {
+				rte_exit(EXIT_FAILURE,
+						":: Rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQs; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+						rte_eth_dev_socket_id(port_id),
+						&txq_conf);
+			if (ret < 0) {
+				rte_exit(EXIT_FAILURE,
+						":: Tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+		}
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0) {
+			rte_exit(EXIT_FAILURE,
+					"rte_eth_dev_start:err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	uint16_t lcore_id;
+	uint16_t port;
+	uint16_t nr_ports;
+	int ret;
+	struct rte_flow_error error;
+
+	nr_ports = rte_eth_dev_count_avail();
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	force_quit = false;
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
+	argc -= ret;
+	argv += ret;
+
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	flows_creator();
+
+	init_lcore_info();
+
+	rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+
+	if (rte_eal_wait_lcore(lcore_id) < 0)
+		break;
+
+	for (port = 0; port < nr_ports; port++) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 0000000..2326bec
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
+	'main.c',
+)
diff --git a/config/common_base b/config/common_base
index c31175f..79455bf 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1111,3 +1111,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
-- 
2.7.4


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-17 13:46 [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app Wisam Jaddo
@ 2020-03-20  6:49 ` Jerin Jacob
  2020-03-20 11:51   ` Thomas Monjalon
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  1 sibling, 1 reply; 102+ messages in thread
From: Jerin Jacob @ 2020-03-20  6:49 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dpdk-dev, Thomas Monjalon, Matan Azrad, Raslan Darawsheh

On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo <wisamm@mellanox.com> wrote:

Thanks for this application. Useful stuff.

>
> Introducing new application for rte_flow performance
> testing. The application provide the ability to test
> insertion rate of specific rte_flow rule, by stressing
> it to the NIC, and calculate the insertion rate.
>
> It also provides packet per second measurements
> after the insertion operation is done.
>
> The application offers some options in the command
> line, to configure which rule to apply.
>
> After that the application will start producing rules
> with same pattern but increasing the outer IP source
> address by 1 each time, thus it will give different
> flow each time, and all other items will have open masks.
>
> The current design have single core insertion rate.
> In the future we may have a multi core insertion rate
> measurement support in the app.

If I understand correctly,
# On the main thread, this  application first check the flow insertion
performance
# and then start the worker thread for packet forwarding.
Why this application testing the packet forwarding?, We already have
testpmd for that.

IMO, This application needs to focus only on
- Insertion performance
- Deletion performance
- IMO, it is better to add a framework for the profile where the first
version of this application can
define common a set of ITEMS and set of ACTION and later others can extend it.
And the framework can run over all the profiles and spit out the
insertion and deletion
performance.


>
> The app supports single and multi core performance
> measurements.
>
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---
>  app/Makefile                     |   1 +
>  app/meson.build                  |   1 +

# Update MAINTAINERS file

# Add doc for this test under doc/guides/tools/

# Please update release notes

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-20  6:49 ` Jerin Jacob
@ 2020-03-20 11:51   ` Thomas Monjalon
  2020-03-20 12:18     ` Jerin Jacob
  0 siblings, 1 reply; 102+ messages in thread
From: Thomas Monjalon @ 2020-03-20 11:51 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: Wisam Jaddo, dpdk-dev, Matan Azrad, Raslan Darawsheh

20/03/2020 07:49, Jerin Jacob:
> On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo <wisamm@mellanox.com> wrote:
> 
> Thanks for this application. Useful stuff.
> 
> >
> > Introducing new application for rte_flow performance
> > testing. The application provide the ability to test
> > insertion rate of specific rte_flow rule, by stressing
> > it to the NIC, and calculate the insertion rate.
> >
> > It also provides packet per second measurements
> > after the insertion operation is done.
> >
> > The application offers some options in the command
> > line, to configure which rule to apply.
> >
> > After that the application will start producing rules
> > with same pattern but increasing the outer IP source
> > address by 1 each time, thus it will give different
> > flow each time, and all other items will have open masks.
> >
> > The current design have single core insertion rate.
> > In the future we may have a multi core insertion rate
> > measurement support in the app.
> 
> If I understand correctly,
> # On the main thread, this  application first check the flow insertion
> performance
> # and then start the worker thread for packet forwarding.
> Why this application testing the packet forwarding?, We already have
> testpmd for that.

I think it is interesting to measure forwarding performance
when million of flow rules are in effect.

> IMO, This application needs to focus only on
> - Insertion performance
> - Deletion performance
> - IMO, it is better to add a framework for the profile where the first
> version of this application can
> define common a set of ITEMS and set of ACTION and later others can extend it.
> And the framework can run over all the profiles and spit out the
> insertion and deletion
> performance.

What do you call a profile? Is it a set of rules?
I think this first version is proposing rules customization with parameters.
Note: I prefer a non-interactive application for performance testing.

> > The app supports single and multi core performance
> > measurements.




^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-20 11:51   ` Thomas Monjalon
@ 2020-03-20 12:18     ` Jerin Jacob
  2020-03-23  9:53       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Jerin Jacob @ 2020-03-20 12:18 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Wisam Jaddo, dpdk-dev, Matan Azrad, Raslan Darawsheh

On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 20/03/2020 07:49, Jerin Jacob:
> > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo <wisamm@mellanox.com> wrote:
> >
> > Thanks for this application. Useful stuff.
> >
> > >
> > > Introducing new application for rte_flow performance
> > > testing. The application provide the ability to test
> > > insertion rate of specific rte_flow rule, by stressing
> > > it to the NIC, and calculate the insertion rate.
> > >
> > > It also provides packet per second measurements
> > > after the insertion operation is done.
> > >
> > > The application offers some options in the command
> > > line, to configure which rule to apply.
> > >
> > > After that the application will start producing rules
> > > with same pattern but increasing the outer IP source
> > > address by 1 each time, thus it will give different
> > > flow each time, and all other items will have open masks.
> > >
> > > The current design have single core insertion rate.
> > > In the future we may have a multi core insertion rate
> > > measurement support in the app.
> >
> > If I understand correctly,
> > # On the main thread, this  application first check the flow insertion
> > performance
> > # and then start the worker thread for packet forwarding.
> > Why this application testing the packet forwarding?, We already have
> > testpmd for that.
>
> I think it is interesting to measure forwarding performance
> when million of flow rules are in effect.

The rules are applied to the HW CAM, Right?
Do you see any performance difference?

>
> > IMO, This application needs to focus only on
> > - Insertion performance
> > - Deletion performance
> > - IMO, it is better to add a framework for the profile where the first
> > version of this application can
> > define common a set of ITEMS and set of ACTION and later others can extend it.
> > And the framework can run over all the profiles and spit out the
> > insertion and deletion
> > performance.
>
> What do you call a profile? Is it a set of rules?

set of rules and/or actions.

> I think this first version is proposing rules customization with parameters.

Just that it better to have a framework where one can easily add new
profiles and
test various combos. IMO, Cascade rules take more insertion time.

> Note: I prefer a non-interactive application for performance testing.

Me too. Command-line is fine.

>
> > > The app supports single and multi core performance
> > > measurements.
>
>
>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-20 12:18     ` Jerin Jacob
@ 2020-03-23  9:53       ` Wisam Monther
  2020-03-23 11:15         ` Jerin Jacob
  0 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-03-23  9:53 UTC (permalink / raw)
  To: Jerin Jacob, Thomas Monjalon; +Cc: dpdk-dev, Matan Azrad, Raslan Darawsheh



> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Friday, March 20, 2020 2:18 PM
> To: Thomas Monjalon <thomas@monjalon.net>
> Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev <dev@dpdk.org>;
> Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
> 
> On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon <thomas@monjalon.net>
> wrote:
> >
> > 20/03/2020 07:49, Jerin Jacob:
> > > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo <wisamm@mellanox.com>
> wrote:
> > >
> > > Thanks for this application. Useful stuff.
> > >

😊

> > > >
> > > > Introducing new application for rte_flow performance testing. The
> > > > application provide the ability to test insertion rate of specific
> > > > rte_flow rule, by stressing it to the NIC, and calculate the
> > > > insertion rate.
> > > >
> > > > It also provides packet per second measurements after the
> > > > insertion operation is done.
> > > >
> > > > The application offers some options in the command line, to
> > > > configure which rule to apply.
> > > >
> > > > After that the application will start producing rules with same
> > > > pattern but increasing the outer IP source address by 1 each time,
> > > > thus it will give different flow each time, and all other items
> > > > will have open masks.
> > > >
> > > > The current design have single core insertion rate.
> > > > In the future we may have a multi core insertion rate measurement
> > > > support in the app.
> > >
> > > If I understand correctly,
> > > # On the main thread, this  application first check the flow
> > > insertion performance # and then start the worker thread for packet
> > > forwarding.
> > > Why this application testing the packet forwarding?, We already have
> > > testpmd for that.
> >
> > I think it is interesting to measure forwarding performance when
> > million of flow rules are in effect.
> 
> The rules are applied to the HW CAM, Right?
> Do you see any performance difference?
> 

Yes, there are applied to HW,
No not really, I still didn't test the impact of performance yet.
Moreover it's interesting to see such results and the impact on performance,
Also to see the rules are still matching after all Millions of insertion and millions of packets
Sending/receiving.

> >
> > > IMO, This application needs to focus only on
> > > - Insertion performance
> > > - Deletion performance
> > > - IMO, it is better to add a framework for the profile where the
> > > first version of this application can define common a set of ITEMS
> > > and set of ACTION and later others can extend it.
> > > And the framework can run over all the profiles and spit out the
> > > insertion and deletion performance.
> >
> > What do you call a profile? Is it a set of rules?
> 
> set of rules and/or actions.
> 
> > I think this first version is proposing rules customization with parameters.
> 
> Just that it better to have a framework where one can easily add new
> profiles and test various combos. IMO, Cascade rules take more insertion
> time.
> 
> > Note: I prefer a non-interactive application for performance testing.
> 
> Me too. Command-line is fine.
> 

For this version I'm aiming to have the command line options to decide the profile.
For example:
. /flow-perf -n 4 -w 0000:03:00.1,dv_flow_en=1 -- --ingress --ether --ipv4 --udp --vxlan-gpe --queue --mark
Will mean 4 Million rules of:
Flow create 0 ingress pattern eth / ipv4 src is <X> / udp / vxlan-gpe / end actions mark id 1 / queue < QUEUE _ID> / end

> >
> > > > The app supports single and multi core performance measurements.
> >
> >
> >

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-23  9:53       ` Wisam Monther
@ 2020-03-23 11:15         ` Jerin Jacob
  2020-03-23 11:41           ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Jerin Jacob @ 2020-03-23 11:15 UTC (permalink / raw)
  To: Wisam Monther; +Cc: Thomas Monjalon, dpdk-dev, Matan Azrad, Raslan Darawsheh

On Mon, Mar 23, 2020 at 3:23 PM Wisam Monther <wisamm@mellanox.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Friday, March 20, 2020 2:18 PM
> > To: Thomas Monjalon <thomas@monjalon.net>
> > Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev <dev@dpdk.org>;
> > Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> > <rasland@mellanox.com>
> > Subject: Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
> >
> > On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon <thomas@monjalon.net>
> > wrote:
> > >
> > > 20/03/2020 07:49, Jerin Jacob:
> > > > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo <wisamm@mellanox.com>
> > wrote:
> > > >
> > > > Thanks for this application. Useful stuff.
> > > >
>
>
>
> > > > >
> > > > > Introducing new application for rte_flow performance testing. The
> > > > > application provide the ability to test insertion rate of specific
> > > > > rte_flow rule, by stressing it to the NIC, and calculate the
> > > > > insertion rate.
> > > > >
> > > > > It also provides packet per second measurements after the
> > > > > insertion operation is done.
> > > > >
> > > > > The application offers some options in the command line, to
> > > > > configure which rule to apply.
> > > > >
> > > > > After that the application will start producing rules with same
> > > > > pattern but increasing the outer IP source address by 1 each time,
> > > > > thus it will give different flow each time, and all other items
> > > > > will have open masks.
> > > > >
> > > > > The current design have single core insertion rate.
> > > > > In the future we may have a multi core insertion rate measurement
> > > > > support in the app.
> > > >
> > > > If I understand correctly,
> > > > # On the main thread, this  application first check the flow
> > > > insertion performance # and then start the worker thread for packet
> > > > forwarding.
> > > > Why this application testing the packet forwarding?, We already have
> > > > testpmd for that.
> > >
> > > I think it is interesting to measure forwarding performance when
> > > million of flow rules are in effect.
> >
> > The rules are applied to the HW CAM, Right?
> > Do you see any performance difference?
> >
>
> Yes, there are applied to HW,


OK.IMO, it is better to introduce the command-line argument to
disable/enable packet forwarding.
That will enable if someone needs to test only flow insertion
performance to avoid the IO setup.

>
> No not really, I still didn't test the impact of performance yet.
> Moreover it's interesting to see such results and the impact on performance,
> Also to see the rules are still matching after all Millions of insertion and millions of packets
> Sending/receiving.


>
>
> > >
> > > > IMO, This application needs to focus only on
> > > > - Insertion performance
> > > > - Deletion performance
> > > > - IMO, it is better to add a framework for the profile where the
> > > > first version of this application can define common a set of ITEMS
> > > > and set of ACTION and later others can extend it.
> > > > And the framework can run over all the profiles and spit out the
> > > > insertion and deletion performance.
> > >
> > > What do you call a profile? Is it a set of rules?
> >
> > set of rules and/or actions.
> >
> > > I think this first version is proposing rules customization with parameters.
> >
> > Just that it better to have a framework where one can easily add new
> > profiles and test various combos. IMO, Cascade rules take more insertion
> > time.
> >
> > > Note: I prefer a non-interactive application for performance testing.
> >
> > Me too. Command-line is fine.
> >
>
> For this version I'm aiming to have the command line options to decide the profile.
> For example:
> . /flow-perf -n 4 -w 0000:03:00.1,dv_flow_en=1 -- --ingress --ether --ipv4 --udp --vxlan-gpe --queue --mark
> Will mean 4 Million rules of:
> Flow create 0 ingress pattern eth / ipv4 src is <X> / udp / vxlan-gpe / end actions mark id 1 / queue < QUEUE _ID> / end

Ok. The syntax looks good. I think we can add a number of rules as
well in command like instead of hardcoding to 4Millon.

And what about the flow deletion performance case?


>
>
> > >
> > > > > The app supports single and multi core performance measurements.
> > >
> > >
> > >

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-23 11:15         ` Jerin Jacob
@ 2020-03-23 11:41           ` Wisam Monther
  2020-03-23 13:00             ` Thomas Monjalon
  0 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-03-23 11:41 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: Thomas Monjalon, dpdk-dev, Matan Azrad, Raslan Darawsheh



> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Monday, March 23, 2020 1:16 PM
> To: Wisam Monther <wisamm@mellanox.com>
> Cc: Thomas Monjalon <thomas@monjalon.net>; dpdk-dev
> <dev@dpdk.org>; Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
> 
> On Mon, Mar 23, 2020 at 3:23 PM Wisam Monther
> <wisamm@mellanox.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > Sent: Friday, March 20, 2020 2:18 PM
> > > To: Thomas Monjalon <thomas@monjalon.net>
> > > Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev
> <dev@dpdk.org>;
> > > Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> > > <rasland@mellanox.com>
> > > Subject: Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf
> > > app
> > >
> > > On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon
> > > <thomas@monjalon.net>
> > > wrote:
> > > >
> > > > 20/03/2020 07:49, Jerin Jacob:
> > > > > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo
> > > > > <wisamm@mellanox.com>
> > > wrote:
> > > > >
> > > > > Thanks for this application. Useful stuff.
> > > > >
> >
> >
> >
> > > > > >
> > > > > > Introducing new application for rte_flow performance testing.
> > > > > > The application provide the ability to test insertion rate of
> > > > > > specific rte_flow rule, by stressing it to the NIC, and
> > > > > > calculate the insertion rate.
> > > > > >
> > > > > > It also provides packet per second measurements after the
> > > > > > insertion operation is done.
> > > > > >
> > > > > > The application offers some options in the command line, to
> > > > > > configure which rule to apply.
> > > > > >
> > > > > > After that the application will start producing rules with
> > > > > > same pattern but increasing the outer IP source address by 1
> > > > > > each time, thus it will give different flow each time, and all
> > > > > > other items will have open masks.
> > > > > >
> > > > > > The current design have single core insertion rate.
> > > > > > In the future we may have a multi core insertion rate
> > > > > > measurement support in the app.
> > > > >
> > > > > If I understand correctly,
> > > > > # On the main thread, this  application first check the flow
> > > > > insertion performance # and then start the worker thread for
> > > > > packet forwarding.
> > > > > Why this application testing the packet forwarding?, We already
> > > > > have testpmd for that.
> > > >
> > > > I think it is interesting to measure forwarding performance when
> > > > million of flow rules are in effect.
> > >
> > > The rules are applied to the HW CAM, Right?
> > > Do you see any performance difference?
> > >
> >
> > Yes, there are applied to HW,
> 
> 
> OK.IMO, it is better to introduce the command-line argument to
> disable/enable packet forwarding.
> That will enable if someone needs to test only flow insertion performance to
> avoid the IO setup.
> 

Sure, we can have the forwarding enabled by default, and I'll add --disable-fwd
To command line options, it looks reasonable to have it, I agree

> >
> > No not really, I still didn't test the impact of performance yet.
> > Moreover it's interesting to see such results and the impact on
> > performance, Also to see the rules are still matching after all
> > Millions of insertion and millions of packets Sending/receiving.
> 
> 
> >
> >
> > > >
> > > > > IMO, This application needs to focus only on
> > > > > - Insertion performance
> > > > > - Deletion performance
> > > > > - IMO, it is better to add a framework for the profile where the
> > > > > first version of this application can define common a set of
> > > > > ITEMS and set of ACTION and later others can extend it.
> > > > > And the framework can run over all the profiles and spit out the
> > > > > insertion and deletion performance.
> > > >
> > > > What do you call a profile? Is it a set of rules?
> > >
> > > set of rules and/or actions.
> > >
> > > > I think this first version is proposing rules customization with
> parameters.
> > >
> > > Just that it better to have a framework where one can easily add new
> > > profiles and test various combos. IMO, Cascade rules take more
> > > insertion time.
> > >
> > > > Note: I prefer a non-interactive application for performance testing.
> > >
> > > Me too. Command-line is fine.
> > >
> >
> > For this version I'm aiming to have the command line options to decide the
> profile.
> > For example:
> > . /flow-perf -n 4 -w 0000:03:00.1,dv_flow_en=1 -- --ingress --ether
> > --ipv4 --udp --vxlan-gpe --queue --mark Will mean 4 Million rules of:
> > Flow create 0 ingress pattern eth / ipv4 src is <X> / udp / vxlan-gpe
> > / end actions mark id 1 / queue < QUEUE _ID> / end
> 
> Ok. The syntax looks good. I think we can add a number of rules as well in
> command like instead of hardcoding to 4Millon.
> 

Sure we can have it also
BTW, I'm planning to have a file under "user_paramters.h"
This file for other specific fields such as:
/** Flows count & iteration size **/
#define FLOWS_COUNT      4000000
#define ITERATION_SIZE  100000

/** Configuration **/
#define RXQs 4
#define TXQs 4
#define HAIRPIN_QUEUES 4
#define TOTAL_MBUF_NUM 32000
#define MBUF_SIZE 2048
#define MBUF_CACHE_SIZE 512
#define NR_RXD  256
#define NR_TXD  256

/** Items/Actions parameters **/
#define FLOW_TABLE 1
#define JUMP_ACTION_TABLE 2
#define VLAN_VALUE 1
#define VNI_VALUE 1
#define GRE_PROTO  0x6558
#define META_DATA 1
#define TAG_INDEX 0
#define PORT_ID_DST 1
#define MARK_ID 1
#define TEID_VALUE 1

> And what about the flow deletion performance case?

I agree we should have it as well in this application,
I plan it to do it as well

> 
> 
> >
> >
> > > >
> > > > > > The app supports single and multi core performance
> measurements.
> > > >
> > > >
> > > >

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-23 11:41           ` Wisam Monther
@ 2020-03-23 13:00             ` Thomas Monjalon
  2020-03-23 13:09               ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Thomas Monjalon @ 2020-03-23 13:00 UTC (permalink / raw)
  To: Jerin Jacob, Wisam Monther; +Cc: dpdk-dev, Matan Azrad, Raslan Darawsheh

23/03/2020 12:41, Wisam Monther:
> From: Jerin Jacob <jerinjacobk@gmail.com>
> > On Mon, Mar 23, 2020 at 3:23 PM Wisam Monther wrote:
> > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon wrote:
> > > > > 20/03/2020 07:49, Jerin Jacob:
> > > > > > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo wrote:
> > > > > >
> > > > > > Thanks for this application. Useful stuff.
> > > > > >
> > > > > > >
> > > > > > > Introducing new application for rte_flow performance testing.
> > > > > > > The application provide the ability to test insertion rate of
> > > > > > > specific rte_flow rule, by stressing it to the NIC, and
> > > > > > > calculate the insertion rate.
> > > > > > >
> > > > > > > It also provides packet per second measurements after the
> > > > > > > insertion operation is done.
> > > > > > >
> > > > > > > The application offers some options in the command line, to
> > > > > > > configure which rule to apply.
> > > > > > >
> > > > > > > After that the application will start producing rules with
> > > > > > > same pattern but increasing the outer IP source address by 1
> > > > > > > each time, thus it will give different flow each time, and all
> > > > > > > other items will have open masks.
> > > > > > >
> > > > > > > The current design have single core insertion rate.
> > > > > > > In the future we may have a multi core insertion rate
> > > > > > > measurement support in the app.
> > > > > >
> > > > > > If I understand correctly,
> > > > > > # On the main thread, this  application first check the flow
> > > > > > insertion performance # and then start the worker thread for
> > > > > > packet forwarding.
> > > > > > Why this application testing the packet forwarding?, We already
> > > > > > have testpmd for that.
> > > > >
> > > > > I think it is interesting to measure forwarding performance when
> > > > > million of flow rules are in effect.
> > > >
> > > > The rules are applied to the HW CAM, Right?
> > > > Do you see any performance difference?
> > > >
> > >
> > > Yes, there are applied to HW,
> > 
> > 
> > OK.IMO, it is better to introduce the command-line argument to
> > disable/enable packet forwarding.
> > That will enable if someone needs to test only flow insertion performance to
> > avoid the IO setup.
> > 
> 
> Sure, we can have the forwarding enabled by default, and I'll add --disable-fwd
> To command line options, it looks reasonable to have it, I agree

In general I prefer things disabled by default.
Option --test-fwd makes more sense and can accept some forwarding options.


> > > No not really, I still didn't test the impact of performance yet.
> > > Moreover it's interesting to see such results and the impact on
> > > performance, Also to see the rules are still matching after all
> > > Millions of insertion and millions of packets Sending/receiving.
> > 
> > 
> > > > > > IMO, This application needs to focus only on
> > > > > > - Insertion performance
> > > > > > - Deletion performance
> > > > > > - IMO, it is better to add a framework for the profile where the
> > > > > > first version of this application can define common a set of
> > > > > > ITEMS and set of ACTION and later others can extend it.
> > > > > > And the framework can run over all the profiles and spit out the
> > > > > > insertion and deletion performance.
> > > > >
> > > > > What do you call a profile? Is it a set of rules?
> > > >
> > > > set of rules and/or actions.
> > > >
> > > > > I think this first version is proposing rules customization with
> > parameters.
> > > >
> > > > Just that it better to have a framework where one can easily add new
> > > > profiles and test various combos. IMO, Cascade rules take more
> > > > insertion time.
> > > >
> > > > > Note: I prefer a non-interactive application for performance testing.
> > > >
> > > > Me too. Command-line is fine.
> > > >
> > >
> > > For this version I'm aiming to have the command line options to decide the
> > profile.
> > > For example:
> > > . /flow-perf -n 4 -w 0000:03:00.1,dv_flow_en=1 -- --ingress --ether
> > > --ipv4 --udp --vxlan-gpe --queue --mark Will mean 4 Million rules of:
> > > Flow create 0 ingress pattern eth / ipv4 src is <X> / udp / vxlan-gpe
> > > / end actions mark id 1 / queue < QUEUE _ID> / end
> > 
> > Ok. The syntax looks good. I think we can add a number of rules as well in
> > command like instead of hardcoding to 4Millon.
> 
> Sure we can have it also
> BTW, I'm planning to have a file under "user_paramters.h"
> This file for other specific fields such as:
> /** Flows count & iteration size **/
> #define FLOWS_COUNT      4000000
> #define ITERATION_SIZE  100000

Please make flows count a variable which can be changed with option.


> > And what about the flow deletion performance case?
> 
> I agree we should have it as well in this application,
> I plan it to do it as well

Great, thanks



^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
  2020-03-23 13:00             ` Thomas Monjalon
@ 2020-03-23 13:09               ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-03-23 13:09 UTC (permalink / raw)
  To: Thomas Monjalon, Jerin Jacob; +Cc: dpdk-dev, Matan Azrad, Raslan Darawsheh



> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Monday, March 23, 2020 3:00 PM
> To: Jerin Jacob <jerinjacobk@gmail.com>; Wisam Monther
> <wisamm@mellanox.com>
> Cc: dpdk-dev <dev@dpdk.org>; Matan Azrad <matan@mellanox.com>;
> Raslan Darawsheh <rasland@mellanox.com>
> Subject: Re: [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app
> 
> 23/03/2020 12:41, Wisam Monther:
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > On Mon, Mar 23, 2020 at 3:23 PM Wisam Monther wrote:
> > > > From: Jerin Jacob <jerinjacobk@gmail.com>
> > > > > On Fri, Mar 20, 2020 at 5:21 PM Thomas Monjalon wrote:
> > > > > > 20/03/2020 07:49, Jerin Jacob:
> > > > > > > On Tue, Mar 17, 2020 at 7:16 PM Wisam Jaddo wrote:
> > > > > > >
> > > > > > > Thanks for this application. Useful stuff.
> > > > > > >
> > > > > > > >
> > > > > > > > Introducing new application for rte_flow performance testing.
> > > > > > > > The application provide the ability to test insertion rate
> > > > > > > > of specific rte_flow rule, by stressing it to the NIC, and
> > > > > > > > calculate the insertion rate.
> > > > > > > >
> > > > > > > > It also provides packet per second measurements after the
> > > > > > > > insertion operation is done.
> > > > > > > >
> > > > > > > > The application offers some options in the command line,
> > > > > > > > to configure which rule to apply.
> > > > > > > >
> > > > > > > > After that the application will start producing rules with
> > > > > > > > same pattern but increasing the outer IP source address by
> > > > > > > > 1 each time, thus it will give different flow each time,
> > > > > > > > and all other items will have open masks.
> > > > > > > >
> > > > > > > > The current design have single core insertion rate.
> > > > > > > > In the future we may have a multi core insertion rate
> > > > > > > > measurement support in the app.
> > > > > > >
> > > > > > > If I understand correctly,
> > > > > > > # On the main thread, this  application first check the flow
> > > > > > > insertion performance # and then start the worker thread for
> > > > > > > packet forwarding.
> > > > > > > Why this application testing the packet forwarding?, We
> > > > > > > already have testpmd for that.
> > > > > >
> > > > > > I think it is interesting to measure forwarding performance
> > > > > > when million of flow rules are in effect.
> > > > >
> > > > > The rules are applied to the HW CAM, Right?
> > > > > Do you see any performance difference?
> > > > >
> > > >
> > > > Yes, there are applied to HW,
> > >
> > >
> > > OK.IMO, it is better to introduce the command-line argument to
> > > disable/enable packet forwarding.
> > > That will enable if someone needs to test only flow insertion
> > > performance to avoid the IO setup.
> > >
> >
> > Sure, we can have the forwarding enabled by default, and I'll add
> > --disable-fwd To command line options, it looks reasonable to have it,
> > I agree
> 
> In general I prefer things disabled by default.
> Option --test-fwd makes more sense and can accept some forwarding
> options.

sure

> 
> 
> > > > No not really, I still didn't test the impact of performance yet.
> > > > Moreover it's interesting to see such results and the impact on
> > > > performance, Also to see the rules are still matching after all
> > > > Millions of insertion and millions of packets Sending/receiving.
> > >
> > >
> > > > > > > IMO, This application needs to focus only on
> > > > > > > - Insertion performance
> > > > > > > - Deletion performance
> > > > > > > - IMO, it is better to add a framework for the profile where
> > > > > > > the first version of this application can define common a
> > > > > > > set of ITEMS and set of ACTION and later others can extend it.
> > > > > > > And the framework can run over all the profiles and spit out
> > > > > > > the insertion and deletion performance.
> > > > > >
> > > > > > What do you call a profile? Is it a set of rules?
> > > > >
> > > > > set of rules and/or actions.
> > > > >
> > > > > > I think this first version is proposing rules customization
> > > > > > with
> > > parameters.
> > > > >
> > > > > Just that it better to have a framework where one can easily add
> > > > > new profiles and test various combos. IMO, Cascade rules take
> > > > > more insertion time.
> > > > >
> > > > > > Note: I prefer a non-interactive application for performance testing.
> > > > >
> > > > > Me too. Command-line is fine.
> > > > >
> > > >
> > > > For this version I'm aiming to have the command line options to
> > > > decide the
> > > profile.
> > > > For example:
> > > > . /flow-perf -n 4 -w 0000:03:00.1,dv_flow_en=1 -- --ingress
> > > > --ether
> > > > --ipv4 --udp --vxlan-gpe --queue --mark Will mean 4 Million rules of:
> > > > Flow create 0 ingress pattern eth / ipv4 src is <X> / udp /
> > > > vxlan-gpe / end actions mark id 1 / queue < QUEUE _ID> / end
> > >
> > > Ok. The syntax looks good. I think we can add a number of rules as
> > > well in command like instead of hardcoding to 4Millon.
> >
> > Sure we can have it also
> > BTW, I'm planning to have a file under "user_paramters.h"
> > This file for other specific fields such as:
> > /** Flows count & iteration size **/
> > #define FLOWS_COUNT      4000000
> > #define ITERATION_SIZE  100000
> 
> Please make flows count a variable which can be changed with option.

Sure

> 
> 
> > > And what about the flow deletion performance case?
> >
> > I agree we should have it as well in this application, I plan it to do
> > it as well
> 
> Great, thanks
> 

Thanks,


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton
  2020-03-17 13:46 [dpdk-dev] [RFC] app/test-flow-perf: add rte_flow perf app Wisam Jaddo
  2020-03-20  6:49 ` Jerin Jacob
@ 2020-04-09 15:42 ` Wisam Jaddo
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
                     ` (6 more replies)
  1 sibling, 7 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-09 15:42 UTC (permalink / raw)
  To: dev, jackmin, jerinjacobk; +Cc: thomas

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                          |   5 +
 app/Makefile                         |   1 +
 app/meson.build                      |   1 +
 app/test-flow-perf/Makefile          |  26 +++
 app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |  11 ++
 app/test-flow-perf/user_parameters.h |  16 ++
 config/common_base                   |   5 +
 doc/guides/tools/flow-perf.rst       |  69 ++++++++
 doc/guides/tools/index.rst           |   1 +
 10 files changed, 381 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 4800f6884a..a389ac127f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1495,6 +1495,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index db9d2d5380..694df67358 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
 
diff --git a/app/meson.build b/app/meson.build
index 71109cc422..20d77b0bd6 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -14,6 +14,7 @@ apps = [
 	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
+	'test-flow-perf',
 	'test-pipeline',
 	'test-pmd',
 	'test-sad']
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 0000000000..45b1fb1464
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = flow_perf
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-deprecated-declarations
+CFLAGS += -Wno-unused-function
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 0000000000..156b9ef553
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+
+#include "user_parameters.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void usage(char *progname)
+{
+	printf("\nusage: %s", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			usage(argv[0]);
+			printf("Invalid option: %s\n", argv[optind]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i, j;
+	uint16_t port_id;
+	uint16_t nr_ports = rte_eth_dev_count_avail();
+	struct rte_eth_hairpin_conf hairpin_conf = {
+			.peer_count = 1,
+	};
+	struct rte_eth_conf port_conf = {
+		.rxmode = {
+			.split_hdr_size = 0,
+		},
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+					ETH_RSS_IP  |
+					ETH_RSS_UDP |
+					ETH_RSS_TCP,
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"Error during getting device (port %u) info: %s\n",
+					port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		printf(":: initializing port: %d\n", port_id);
+		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
+				TXQs + HAIRPIN_QUEUES, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+					":: cannot configure device: err=%d, port=%u\n",
+					ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+		for (i = 0; i < RXQs; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+						rte_eth_dev_socket_id(port_id),
+						&rxq_conf,
+						mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQs; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+						rte_eth_dev_socket_id(port_id),
+						&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					":: promiscuous mode enable failed: err=%s, port=%u\n",
+					rte_strerror(-ret), port_id);
+
+		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + TXQs;
+			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+							NR_RXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + RXQs;
+			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+							NR_TXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	uint16_t lcore_id;
+	uint16_t port;
+	uint16_t nr_ports;
+	int ret;
+	struct rte_flow_error error;
+
+	nr_ports = rte_eth_dev_count_avail();
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+
+	if (rte_eal_wait_lcore(lcore_id) < 0)
+		break;
+
+	for (port = 0; port < nr_ports; port++) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 0000000000..ec9bb3b3aa
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+	'main.c',
+)
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
new file mode 100644
index 0000000000..56ec7f47b5
--- /dev/null
+++ b/app/test-flow-perf/user_parameters.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ *
+ * This file will hold the user parameters values
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+/** Configuration **/
+#define RXQs 4
+#define TXQs 4
+#define HAIRPIN_QUEUES 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/config/common_base b/config/common_base
index c31175f9d6..79455bf94a 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1111,3 +1111,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 0000000000..30ce1b6cc0
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,69 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+RTE Flow performance tool
+=========================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+The basic compilation steps are:
+
+#. Set the required environmental variables and go to the source directory:
+
+	.. code-block:: console
+
+		export RTE_SDK=/path/to/rte_sdk
+		cd $RTE_SDK
+
+#. Set the compilation target. For example:
+
+	.. code-block:: console
+
+		export RTE_TARGET=x86_64-native-linux-gcc
+
+#. Build the application:
+
+	.. code-block:: console
+
+		make install T=$RTE_TARGET
+
+#. The compiled application will be located at:
+
+	.. code-block:: console
+
+		$RTE_SDK/$RTE_TARGET/app/flow-perf
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section, with
+a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864e..7279daebc6 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-09 15:42   ` Wisam Jaddo
  2020-04-17  2:07     ` Xiaoyu Min
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-09 15:42 UTC (permalink / raw)
  To: dev, jackmin, jerinjacobk; +Cc: thomas

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile          |   3 +
 app/test-flow-perf/actions_gen.c     |  86 ++++++
 app/test-flow-perf/actions_gen.h     |  48 ++++
 app/test-flow-perf/flow_gen.c        | 179 ++++++++++++
 app/test-flow-perf/flow_gen.h        |  61 ++++
 app/test-flow-perf/items_gen.c       | 265 +++++++++++++++++
 app/test-flow-perf/items_gen.h       |  68 +++++
 app/test-flow-perf/main.c            | 415 +++++++++++++++++++++++++--
 app/test-flow-perf/meson.build       |   8 +
 app/test-flow-perf/user_parameters.h |  15 +
 doc/guides/tools/flow-perf.rst       | 186 +++++++++++-
 11 files changed, 1309 insertions(+), 25 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index 45b1fb1464..968c7c60dd 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -19,6 +19,9 @@ CFLAGS += -Wno-unused-function
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 0000000000..564ed820e4
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+void
+gen_mark(void)
+{
+	mark_action.id = MARK_ID;
+}
+
+void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+void
+gen_jump(uint16_t next_table)
+{
+	jump_action.group = next_table;
+}
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number)
+{
+	uint16_t queue;
+	struct action_rss_data *rss_data;
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = ETH_RSS_IP,
+			.key_len = 0,
+			.queue_num = queues_number,
+			.key = 0,
+			.queue = rss_data->queue,
+		},
+		.key = { 0 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+}
+
+void
+gen_set_meta(void)
+{
+	meta_action.data = RTE_BE32(META_DATA);
+	meta_action.mask = RTE_BE32(0xffffffff);
+}
+
+void
+gen_set_tag(void)
+{
+	tag_action.data = RTE_BE32(META_DATA);
+	tag_action.mask = RTE_BE32(0xffffffff);
+	tag_action.index = TAG_INDEX;
+}
+
+void
+gen_port_id(void)
+{
+	port_id.id = PORT_ID_DST;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 0000000000..556d48b871
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,48 @@
+/** SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#ifndef _ACTION_GEN_
+#define _ACTION_GEN_
+
+struct rte_flow_action_mark mark_action;
+struct rte_flow_action_queue queue_action;
+struct rte_flow_action_jump jump_action;
+struct rte_flow_action_rss *rss_action;
+struct rte_flow_action_set_meta meta_action;
+struct rte_flow_action_set_tag tag_action;
+struct rte_flow_action_port_id port_id;
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[64];
+	uint16_t queue[128];
+} action_rss_data;
+
+void
+gen_mark(void);
+
+void
+gen_queue(uint16_t queue);
+
+void
+gen_jump(uint16_t next_table);
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number);
+
+void
+gen_set_meta(void);
+
+void
+gen_set_tag(void);
+
+void
+gen_port_id(void);
+
+#endif
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 0000000000..20187e4ed4
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & META_ITEM)
+		add_meta_data(items, items_counter++);
+	if (flow_items & TAG_ITEM)
+		add_meta_tag(items, items_counter++);
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & VLAN_ITEM)
+		add_vlan(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & IPV6_ITEM)
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & TCP_ITEM)
+		add_tcp(items, items_counter++);
+	if (flow_items & UDP_ITEM)
+		add_udp(items, items_counter++);
+	if (flow_items & VXLAN_ITEM)
+		add_vxlan(items, items_counter++);
+	if (flow_items & VXLAN_GPE_ITEM)
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & GRE_ITEM)
+		add_gre(items, items_counter++);
+	if (flow_items & GENEVE_ITEM)
+		add_geneve(items, items_counter++);
+	if (flow_items & GTP_ITEM)
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint32_t counter, uint16_t next_table)
+{
+	uint8_t actions_counter = 0;
+	uint16_t queues[RXQs];
+	uint16_t hairpin_queues[HAIRPIN_QUEUES];
+	uint16_t i;
+	struct rte_flow_action_count count_action;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		if (!counter)
+			gen_mark();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+	if (flow_actions & COUNT_ACTION) {
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+		actions[actions_counter++].conf = &count_action;
+	}
+	if (flow_actions & META_ACTION) {
+		if (!counter)
+			gen_set_meta();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+		actions[actions_counter++].conf = &meta_action;
+	}
+	if (flow_actions & TAG_ACTION) {
+		if (!counter)
+			gen_set_tag();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+		actions[actions_counter++].conf = &tag_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & RSS_ACTION) {
+		if (!counter) {
+			for (i = 0; i < RXQs; i++)
+				queues[i] = i;
+			gen_rss(queues, RXQs);
+		}
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+	if (flow_actions & JUMP_ACTION) {
+		if (!counter)
+			gen_jump(next_table);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+		actions[actions_counter++].conf = &jump_action;
+	}
+	if (flow_actions & PORT_ID_ACTION) {
+		if (!counter)
+			gen_port_id();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+		actions[actions_counter++].conf = &port_id;
+	}
+	if (flow_actions & DROP_ACTION)
+		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
+		gen_queue((counter % HAIRPIN_QUEUES) + RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		if (!counter) {
+			for (i = 0; i < RXQs; i++)
+				hairpin_queues[i] = i + RXQs;
+			gen_rss(hairpin_queues, HAIRPIN_QUEUES);
+		}
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+			outer_ip_src, next_table);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 0000000000..99cb9e3791
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _FLOW_GEN_
+#define _FLOW_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+/* Items */
+#define ETH_ITEM       0x0001
+#define IPV4_ITEM      0x0002
+#define IPV6_ITEM      0x0004
+#define VLAN_ITEM      0x0008
+#define TCP_ITEM       0x0010
+#define UDP_ITEM       0x0020
+#define VXLAN_ITEM     0x0040
+#define VXLAN_GPE_ITEM 0x0080
+#define GRE_ITEM       0x0100
+#define GENEVE_ITEM    0x0200
+#define GTP_ITEM       0x0400
+#define META_ITEM      0x0800
+#define TAG_ITEM       0x1000
+
+/* Actions */
+#define QUEUE_ACTION   0x0001
+#define MARK_ACTION    0x0002
+#define JUMP_ACTION    0x0004
+#define RSS_ACTION     0x0008
+#define COUNT_ACTION   0x0010
+#define META_ACTION    0x0020
+#define TAG_ACTION     0x0040
+#define DROP_ACTION    0x0080
+#define PORT_ID_ACTION 0x0100
+#define HAIRPIN_QUEUE_ACTION 0x0200
+#define HAIRPIN_RSS_ACTION   0x0400
+
+/* Attributes */
+#define INGRESS  0x0001
+#define EGRESS   0x0002
+#define TRANSFER 0x0004
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error);
+
+#endif
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 0000000000..fb9733d4e7
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "user_parameters.h"
+
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+static struct rte_flow_item_vlan vlan_spec;
+static struct rte_flow_item_vlan vlan_mask;
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+static struct rte_flow_item_ipv6 ipv6_spec;
+static struct rte_flow_item_ipv6 ipv6_mask;
+static struct rte_flow_item_udp udp_spec;
+static struct rte_flow_item_udp udp_mask;
+static struct rte_flow_item_tcp tcp_spec;
+static struct rte_flow_item_tcp tcp_mask;
+static struct rte_flow_item_vxlan vxlan_spec;
+static struct rte_flow_item_vxlan vxlan_mask;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+static struct rte_flow_item_gre gre_spec;
+static struct rte_flow_item_gre gre_mask;
+static struct rte_flow_item_geneve geneve_spec;
+static struct rte_flow_item_geneve geneve_mask;
+static struct rte_flow_item_gtp gtp_spec;
+static struct rte_flow_item_gtp gtp_mask;
+static struct rte_flow_item_meta meta_spec;
+static struct rte_flow_item_meta meta_mask;
+static struct rte_flow_item_tag tag_spec;
+static struct rte_flow_item_tag tag_mask;
+
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+	eth_spec.type = 0;
+	eth_mask.type = 0;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t vlan_value = VLAN_VALUE;
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = 0xffffffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6)
+{
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+					sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 1,
+					sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags **/
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t proto = GRE_PROTO;
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = 0xffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t teid_value = TEID_VALUE;
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	uint8_t index = TAG_INDEX;
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 0000000000..0b01385951
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items related methods
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _ITEMS_GEN_
+#define _ITEMS_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6);
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+#endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 156b9ef553..59dc5ae0f4 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,29 +49,119 @@
 #include <rte_cycles.h>
 #include <rte_memory.h>
 
+#include "flow_gen.h"
 #include "user_parameters.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS 100
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static volatile bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
 
 static void usage(char *progname)
 {
 	printf("\nusage: %s", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt;
 	int opt_idx;
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              0, 0, 0 },
+		{ "hairpin-rss",                0, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -80,6 +170,140 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+			/* Attributes */
+			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
+				flow_attrs |= INGRESS;
+				printf("ingress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "egress")) {
+				flow_attrs |= EGRESS;
+				printf("egress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
+				flow_attrs |= TRANSFER;
+				printf("transfer ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "group")) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+			/* Items */
+			if (!strcmp(lgopts[opt_idx].name, "ether")) {
+				flow_items |= ETH_ITEM;
+				printf("ether / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
+				flow_items |= IPV4_ITEM;
+				printf("ipv4 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vlan")) {
+				flow_items |= VLAN_ITEM;
+				printf("vlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv6")) {
+				flow_items |= IPV6_ITEM;
+				printf("ipv6 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tcp")) {
+				flow_items |= TCP_ITEM;
+				printf("tcp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "udp")) {
+				flow_items |= UDP_ITEM;
+				printf("udp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan")) {
+				flow_items |= VXLAN_ITEM;
+				printf("vxlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan-gpe")) {
+				flow_items |= VXLAN_GPE_ITEM;
+				printf("vxlan-gpe / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gre")) {
+				flow_items |= GRE_ITEM;
+				printf("gre / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "geneve")) {
+				flow_items |= GENEVE_ITEM;
+				printf("geneve / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gtp")) {
+				flow_items |= GTP_ITEM;
+				printf("gtp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "meta")) {
+				flow_items |= META_ITEM;
+				printf("meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tag")) {
+				flow_items |= TAG_ITEM;
+				printf("tag / ");
+			}
+			/* Actions */
+			if (!strcmp(lgopts[opt_idx].name, "port-id")) {
+				flow_actions |= PORT_ID_ACTION;
+				printf("port-id / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "rss")) {
+				flow_actions |= RSS_ACTION;
+				printf("rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "queue")) {
+				flow_actions |= QUEUE_ACTION;
+				printf("queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "jump")) {
+				flow_actions |= JUMP_ACTION;
+				printf("jump / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "mark")) {
+				flow_actions |= MARK_ACTION;
+				printf("mark / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "count")) {
+				flow_actions |= COUNT_ACTION;
+				printf("count / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-meta")) {
+				flow_actions |= META_ACTION;
+				printf("set-meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-tag")) {
+				flow_actions |= TAG_ACTION;
+				printf("set-tag / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "drop")) {
+				flow_actions |= DROP_ACTION;
+				printf("drop / ");
+			}
+
+			/* Control */
+			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
+				dump_iterations = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -88,6 +312,127 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jumo into flow_group
+			 * This way the app will avoid the default rules
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
+				JUMP_ACTION, flow_group, 0, &error);
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i,  &error);
+				if (!flow)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
@@ -96,6 +441,8 @@ init_port(void)
 	int ret;
 	uint16_t i, j;
 	uint16_t port_id;
+	uint16_t nr_queues;
+	bool hairpin_flag = false;
 	uint16_t nr_ports = rte_eth_dev_count_avail();
 	struct rte_eth_hairpin_conf hairpin_conf = {
 			.peer_count = 1,
@@ -115,6 +462,13 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQs;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION ||
+		flow_actions & HAIRPIN_RSS_ACTION) {
+		nr_queues = RXQs + HAIRPIN_QUEUES;
+		hairpin_flag = true;
+	}
+
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
 	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
@@ -134,8 +488,8 @@ init_port(void)
 
 		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
 		printf(":: initializing port: %d\n", port_id);
-		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
-				TXQs + HAIRPIN_QUEUES, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 					":: cannot configure device: err=%d, port=%u\n",
@@ -173,26 +527,30 @@ init_port(void)
 					":: promiscuous mode enable failed: err=%s, port=%u\n",
 					rte_strerror(-ret), port_id);
 
-		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + TXQs;
-			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
-							NR_RXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
-		}
+		if (hairpin_flag) {
+			for (i = RXQs, j = 0;
+					i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + TXQs;
+				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+					NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 
-		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + RXQs;
-			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
-							NR_TXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
+			for (i = TXQs, j = 0;
+					i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + RXQs;
+				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+					NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 		}
 
 		ret = rte_eth_dev_start(port_id);
@@ -219,6 +577,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = 4000000;
+	iterations_number = 100000;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 
@@ -232,6 +599,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index ec9bb3b3aa..b3941f5c2d 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -5,7 +5,15 @@
 #
 # To build this example as a standalone application with an already-installed
 # DPDK instance, use 'make'
+name = 'flow_perf'
+allow_experimental_apis = true
+cflags += '-Wno-deprecated-declarations'
+cflags += '-Wunused-function'
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
+deps += ['ethdev']
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
index 56ec7f47b5..1d157430b6 100644
--- a/app/test-flow-perf/user_parameters.h
+++ b/app/test-flow-perf/user_parameters.h
@@ -14,3 +14,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/** Items/Actions parameters **/
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/** Flow items/acctions max size **/
+#define MAX_ITEMS_NUM 20
+#define MAX_ACTIONS_NUM 20
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 30ce1b6cc0..62e038c430 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -4,7 +4,19 @@
 RTE Flow performance tool
 =========================
 
-Application for rte_flow performance testing.
+Application for rte_flow performance testing. The application provide the
+ability to test insertion rate of specific rte_flow rule, by stressing it
+to the NIC, and calculate the insertion rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+The current design have single core insertion rate. In the future we may
+have a multi core insertion rate measurement support in the app.
 
 
 Compiling the Application
@@ -61,9 +73,179 @@ a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+	sudo ./flow_perf -n 4 -w 08:00.0,dv_flow_en=1 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-09 15:42   ` " Wisam Jaddo
  2020-04-17  2:07     ` Xiaoyu Min
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-09 15:42 UTC (permalink / raw)
  To: dev, jackmin, jerinjacobk; +Cc: thomas

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 87 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  4 ++
 2 files changed, 91 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 59dc5ae0f4..84f2c0c39b 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -75,6 +76,8 @@ static void usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -123,6 +126,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -304,6 +308,8 @@ args_parse(int argc, char **argv)
 			}
 			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
 				dump_iterations = true;
+			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
+				delete_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -323,9 +329,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (!flow_list[i])
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used = 0;
@@ -337,6 +409,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -348,7 +421,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
+		flow_index = 0;
+
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jumo into flow_group
@@ -365,6 +445,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -388,6 +469,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -421,6 +504,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -579,6 +665,7 @@ main(int argc, char **argv)
 
 	force_quit = false;
 	dump_iterations = false;
+	delete_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 62e038c430..e07e659df5 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,6 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -89,6 +91,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-09 15:42   ` Wisam Jaddo
  2020-04-17  2:08     ` Xiaoyu Min
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
                     ` (3 subsequent siblings)
  6 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-09 15:42 UTC (permalink / raw)
  To: dev, jackmin, jerinjacobk; +Cc: thomas, Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 69 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  6 ++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 84f2c0c39b..438fbf850a 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -78,6 +79,7 @@ static void usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: to dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -127,6 +129,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -310,6 +313,8 @@ args_parse(int argc, char **argv)
 				dump_iterations = true;
 			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
 				delete_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -321,6 +326,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -657,6 +718,7 @@ main(int argc, char **argv)
 	uint16_t nr_ports;
 	int ret;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	nr_ports = rte_eth_dev_count_avail();
 	ret = rte_eal_init(argc, argv);
@@ -666,6 +728,7 @@ main(int argc, char **argv)
 	force_quit = false;
 	dump_iterations = false;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
@@ -686,7 +749,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index e07e659df5..28d452fd06 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,7 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -94,6 +95,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                     ` (2 preceding siblings ...)
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-09 15:42   ` Wisam Jaddo
  2020-04-17  2:09     ` Xiaoyu Min
  2020-04-28 14:09     ` Or Gerlitz
  2020-04-16 15:12   ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Monther
                     ` (2 subsequent siblings)
  6 siblings, 2 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-09 15:42 UTC (permalink / raw)
  To: dev, jackmin, jerinjacobk; +Cc: thomas

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 300 +++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |   6 +
 2 files changed, 306 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 438fbf850a..96d9a71086 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -60,14 +60,45 @@ static uint8_t flow_group;
 static uint16_t flow_items;
 static uint16_t flow_actions;
 static uint8_t flow_attrs;
+
 static volatile bool force_quit;
 static volatile bool dump_iterations;
 static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
+static volatile bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST 32
+#define LCORE_MODE_PKT 1
+#define LCORE_MODE_STATS 2
+#define MAX_STREAMS 64
+#define MAX_LCORES 64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void usage(char *progname)
 {
@@ -80,6 +111,8 @@ static void usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: to dump all socket memory\n");
+	printf("  --enable-fwd: to enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -130,6 +163,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -315,6 +349,8 @@ args_parse(int argc, char **argv)
 				delete_flag = true;
 			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
 				dump_socket_mem_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "enable-fwd"))
+				enable_fwd = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -582,6 +618,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%6s %16s %16s %16s\n", "------", "----------------",
+			"----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/* First logical core is reserved for stats printing */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQs;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		/** Create FWD stream **/
+		for (queue = 0; queue < RXQs; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/** Print all streams **/
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/** No streams for this core **/
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
 static void
 init_port(void)
 {
@@ -757,6 +1052,11 @@ main(int argc, char **argv)
 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
 		(alloc - last_alloc) / 1.0e6);
 
+	if (enable_fwd) {
+		init_lcore_info();
+		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+	}
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 28d452fd06..ecd760de81 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -21,6 +21,8 @@ have a multi core insertion rate measurement support in the app.
 The application also provide the ability to measure rte flow deletion rate,
 in addition to memory consumption before and after the flows creation.
 
+The app supports single and multi core performance measurements.
+
 
 Compiling the Application
 =========================
@@ -98,6 +100,10 @@ The command line options are:
 *	``--dump-socket-mem``
 	Dump the memory stats for each socket before the insertion and after.
 
+*	``enable-fwd``
+	Enable packets forwarding after insertion/deletion operations.
+
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                     ` (3 preceding siblings ...)
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
@ 2020-04-16 15:12   ` Wisam Monther
  2020-04-17  2:05     ` Xiaoyu Min
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-05-06  3:00   ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Ajit Khaparde
  6 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-04-16 15:12 UTC (permalink / raw)
  To: dev, Jack Min, jerinjacobk; +Cc: Thomas Monjalon



>-----Original Message-----
>From: dev <dev-bounces@dpdk.org> On Behalf Of Wisam Jaddo
>Sent: Thursday, April 9, 2020 6:43 PM
>To: dev@dpdk.org; Jack Min <jackmin@mellanox.com>;
>jerinjacobk@gmail.com
>Cc: Thomas Monjalon <thomas@monjalon.net>
>Subject: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance
>skeleton
>
>Add flow performance application skeleton.
>
>Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>---
> MAINTAINERS                          |   5 +
> app/Makefile                         |   1 +
> app/meson.build                      |   1 +
> app/test-flow-perf/Makefile          |  26 +++
> app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
> app/test-flow-perf/meson.build       |  11 ++
> app/test-flow-perf/user_parameters.h |  16 ++
> config/common_base                   |   5 +
> doc/guides/tools/flow-perf.rst       |  69 ++++++++
> doc/guides/tools/index.rst           |   1 +
> 10 files changed, 381 insertions(+)
> create mode 100644 app/test-flow-perf/Makefile  create mode 100644
>app/test-flow-perf/main.c  create mode 100644 app/test-flow-
>perf/meson.build  create mode 100644 app/test-flow-
>perf/user_parameters.h
> create mode 100644 doc/guides/tools/flow-perf.rst
>
>diff --git a/MAINTAINERS b/MAINTAINERS
>index 4800f6884a..a389ac127f 100644
>--- a/MAINTAINERS
>+++ b/MAINTAINERS
>@@ -1495,6 +1495,11 @@ T: git://dpdk.org/next/dpdk-next-net
> F: app/test-pmd/
> F: doc/guides/testpmd_app_ug/
>
>+Flow performance tool
>+M: Wisam Jaddo <wisamm@mellanox.com>
>+F: app/test-flow-perf
>+F: doc/guides/flow-perf.rst
>+
> Compression performance test application
> T: git://dpdk.org/next/dpdk-next-crypto
> F: app/test-compress-perf/
>diff --git a/app/Makefile b/app/Makefile index db9d2d5380..694df67358
>100644
>--- a/app/Makefile
>+++ b/app/Makefile
>@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
> DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
> DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
> DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
>+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
> DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
> DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
>
>diff --git a/app/meson.build b/app/meson.build index
>71109cc422..20d77b0bd6 100644
>--- a/app/meson.build
>+++ b/app/meson.build
>@@ -14,6 +14,7 @@ apps = [
> 	'test-compress-perf',
> 	'test-crypto-perf',
> 	'test-eventdev',
>+	'test-flow-perf',
> 	'test-pipeline',
> 	'test-pmd',
> 	'test-sad']
>diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile new
>file mode 100644 index 0000000000..45b1fb1464
>--- /dev/null
>+++ b/app/test-flow-perf/Makefile
>@@ -0,0 +1,26 @@
>+# SPDX-License-Identifier: BSD-3-Clause # Copyright 2020 Mellanox
>+Technologies, Ltd
>+
>+include $(RTE_SDK)/mk/rte.vars.mk
>+
>+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
>+
>+#
>+# library name
>+#
>+APP = flow_perf
>+
>+CFLAGS += -DALLOW_EXPERIMENTAL_API
>+CFLAGS += -O3
>+CFLAGS += $(WERROR_FLAGS)
>+CFLAGS += -Wno-deprecated-declarations
>+CFLAGS += -Wno-unused-function
>+
>+#
>+# all source are stored in SRCS-y
>+#
>+SRCS-y += main.c
>+
>+include $(RTE_SDK)/mk/rte.app.mk
>+
>+endif
>diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c new file
>mode 100644 index 0000000000..156b9ef553
>--- /dev/null
>+++ b/app/test-flow-perf/main.c
>@@ -0,0 +1,246 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ *
>+ * This file contain the application main file
>+ * This application provides the user the ability to test the
>+ * insertion rate for specific rte_flow rule under stress state ~4M
>+rule/
>+ *
>+ * Then it will also provide packet per second measurement after
>+installing
>+ * all rules, the user may send traffic to test the PPS that match the
>+rules
>+ * after all rules are installed, to check performance or functionality
>+after
>+ * the stress.
>+ *
>+ * The flows insertion will go for all ports first, then it will print
>+the
>+ * results, after that the application will go into forwarding packets
>+mode
>+ * it will start receiving traffic if any and then forwarding it back
>+and
>+ * gives packet per second measurement.
>+ *
>+ * Copyright 2020 Mellanox Technologies, Ltd  */ #include <stdio.h>
>+#include <stdlib.h> #include <string.h> #include <stdint.h> #include
>+<inttypes.h> #include <sys/types.h> #include <sys/queue.h> #include
>+<netinet/in.h> #include <setjmp.h> #include <stdarg.h> #include
>+<ctype.h> #include <errno.h> #include <getopt.h> #include <signal.h>
>+#include <stdbool.h> #include <assert.h> #include <unistd.h> #include
>+<fcntl.h> #include <sys/time.h>
>+
>+
>+#include <rte_eal.h>
>+#include <rte_common.h>
>+#include <rte_malloc.h>
>+#include <rte_ether.h>
>+#include <rte_ethdev.h>
>+#include <rte_mempool.h>
>+#include <rte_mbuf.h>
>+#include <rte_net.h>
>+#include <rte_flow.h>
>+#include <rte_cycles.h>
>+#include <rte_memory.h>
>+
>+#include "user_parameters.h"
>+
>+static uint32_t nb_lcores;
>+static struct rte_mempool *mbuf_mp;
>+
>+static void usage(char *progname)
>+{
>+	printf("\nusage: %s", progname);
>+}
>+
>+static void
>+args_parse(int argc, char **argv)
>+{
>+	char **argvopt;
>+	int opt;
>+	int opt_idx;
>+	static struct option lgopts[] = {
>+		/* Control */
>+		{ "help",                       0, 0, 0 },
>+	};
>+
>+	argvopt = argv;
>+
>+	while ((opt = getopt_long(argc, argvopt, "",
>+				lgopts, &opt_idx)) != EOF) {
>+		switch (opt) {
>+		case 0:
>+			if (!strcmp(lgopts[opt_idx].name, "help")) {
>+				usage(argv[0]);
>+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>+			}
>+			break;
>+		default:
>+			usage(argv[0]);
>+			printf("Invalid option: %s\n", argv[optind]);
>+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
>+			break;
>+		}
>+	}
>+}
>+
>+static void
>+init_port(void)
>+{
>+	int ret;
>+	uint16_t i, j;
>+	uint16_t port_id;
>+	uint16_t nr_ports = rte_eth_dev_count_avail();
>+	struct rte_eth_hairpin_conf hairpin_conf = {
>+			.peer_count = 1,
>+	};
>+	struct rte_eth_conf port_conf = {
>+		.rxmode = {
>+			.split_hdr_size = 0,
>+		},
>+		.rx_adv_conf = {
>+			.rss_conf.rss_hf =
>+					ETH_RSS_IP  |
>+					ETH_RSS_UDP |
>+					ETH_RSS_TCP,
>+		}
>+	};
>+	struct rte_eth_txconf txq_conf;
>+	struct rte_eth_rxconf rxq_conf;
>+	struct rte_eth_dev_info dev_info;
>+
>+	if (nr_ports == 0)
>+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
>+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
>+					TOTAL_MBUF_NUM,
>MBUF_CACHE_SIZE,
>+					0, MBUF_SIZE,
>+					rte_socket_id());
>+
>+	if (mbuf_mp == NULL)
>+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
>+
>+	for (port_id = 0; port_id < nr_ports; port_id++) {
>+		ret = rte_eth_dev_info_get(port_id, &dev_info);
>+		if (ret != 0)
>+			rte_exit(EXIT_FAILURE,
>+					"Error during getting device (port %u)
>info: %s\n",
>+					port_id, strerror(-ret));
>+
>+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
>+		printf(":: initializing port: %d\n", port_id);
>+		ret = rte_eth_dev_configure(port_id, RXQs +
>HAIRPIN_QUEUES,
>+				TXQs + HAIRPIN_QUEUES, &port_conf);
>+		if (ret < 0)
>+			rte_exit(EXIT_FAILURE,
>+					":: cannot configure device: err=%d,
>port=%u\n",
>+					ret, port_id);
>+
>+		rxq_conf = dev_info.default_rxconf;
>+		rxq_conf.offloads = port_conf.rxmode.offloads;
>+		for (i = 0; i < RXQs; i++) {
>+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
>+
>	rte_eth_dev_socket_id(port_id),
>+						&rxq_conf,
>+						mbuf_mp);
>+			if (ret < 0)
>+				rte_exit(EXIT_FAILURE,
>+						":: Rx queue setup failed:
>err=%d, port=%u\n",
>+						ret, port_id);
>+		}
>+
>+		txq_conf = dev_info.default_txconf;
>+		txq_conf.offloads = port_conf.txmode.offloads;
>+
>+		for (i = 0; i < TXQs; i++) {
>+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
>+
>	rte_eth_dev_socket_id(port_id),
>+						&txq_conf);
>+			if (ret < 0)
>+				rte_exit(EXIT_FAILURE,
>+						":: Tx queue setup failed:
>err=%d, port=%u\n",
>+						ret, port_id);
>+		}
>+
>+		ret = rte_eth_promiscuous_enable(port_id);
>+		if (ret != 0)
>+			rte_exit(EXIT_FAILURE,
>+					":: promiscuous mode enable failed:
>err=%s, port=%u\n",
>+					rte_strerror(-ret), port_id);
>+
>+		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
>+			hairpin_conf.peers[0].port = port_id;
>+			hairpin_conf.peers[0].queue = j + TXQs;
>+			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
>+							NR_RXD,
>&hairpin_conf);
>+			if (ret != 0)
>+				rte_exit(EXIT_FAILURE,
>+					":: Hairpin rx queue setup failed:
>err=%d, port=%u\n",
>+					ret, port_id);
>+		}
>+
>+		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
>+			hairpin_conf.peers[0].port = port_id;
>+			hairpin_conf.peers[0].queue = j + RXQs;
>+			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
>+							NR_TXD,
>&hairpin_conf);
>+			if (ret != 0)
>+				rte_exit(EXIT_FAILURE,
>+					":: Hairpin tx queue setup failed:
>err=%d, port=%u\n",
>+					ret, port_id);
>+		}
>+
>+		ret = rte_eth_dev_start(port_id);
>+		if (ret < 0)
>+			rte_exit(EXIT_FAILURE,
>+				"rte_eth_dev_start:err=%d, port=%u\n",
>+				ret, port_id);
>+
>+		printf(":: initializing port: %d done\n", port_id);
>+	}
>+}
>+
>+int
>+main(int argc, char **argv)
>+{
>+	uint16_t lcore_id;
>+	uint16_t port;
>+	uint16_t nr_ports;
>+	int ret;
>+	struct rte_flow_error error;
>+
>+	nr_ports = rte_eth_dev_count_avail();
>+	ret = rte_eal_init(argc, argv);
>+	if (ret < 0)
>+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>+
>+	argc -= ret;
>+	argv += ret;
>+
>+	if (argc > 1)
>+		args_parse(argc, argv);
>+
>+	init_port();
>+
>+	nb_lcores = rte_lcore_count();
>+
>+	if (nb_lcores <= 1)
>+		rte_exit(EXIT_FAILURE, "This app needs at least two
>cores\n");
>+
>+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
>+
>+	if (rte_eal_wait_lcore(lcore_id) < 0)
>+		break;
>+
>+	for (port = 0; port < nr_ports; port++) {
>+		rte_flow_flush(port, &error);
>+		rte_eth_dev_stop(port);
>+		rte_eth_dev_close(port);
>+	}
>+	return 0;
>+}
>diff --git a/app/test-flow-perf/meson.build b/app/test-flow-
>perf/meson.build new file mode 100644 index 0000000000..ec9bb3b3aa
>--- /dev/null
>+++ b/app/test-flow-perf/meson.build
>@@ -0,0 +1,11 @@
>+# SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2020 Mellanox
>+Technologies, Ltd
>+
>+# meson file, for building this example as part of a main DPDK build.
>+#
>+# To build this example as a standalone application with an
>+already-installed # DPDK instance, use 'make'
>+
>+sources = files(
>+	'main.c',
>+)
>diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-
>perf/user_parameters.h
>new file mode 100644
>index 0000000000..56ec7f47b5
>--- /dev/null
>+++ b/app/test-flow-perf/user_parameters.h
>@@ -0,0 +1,16 @@
>+/* SPDX-License-Identifier: BSD-3-Claus
>+ *
>+ * This file will hold the user parameters values
>+ *
>+ * Copyright 2020 Mellanox Technologies, Ltd  */
>+
>+/** Configuration **/
>+#define RXQs 4
>+#define TXQs 4
>+#define HAIRPIN_QUEUES 4
>+#define TOTAL_MBUF_NUM 32000
>+#define MBUF_SIZE 2048
>+#define MBUF_CACHE_SIZE 512
>+#define NR_RXD  256
>+#define NR_TXD  256
>diff --git a/config/common_base b/config/common_base index
>c31175f9d6..79455bf94a 100644
>--- a/config/common_base
>+++ b/config/common_base
>@@ -1111,3 +1111,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y  # Compile the
>eventdev application  #  CONFIG_RTE_APP_EVENTDEV=y
>+
>+#
>+# Compile the rte flow perf application # CONFIG_RTE_TEST_FLOW_PERF=y
>diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
>new file mode 100644 index 0000000000..30ce1b6cc0
>--- /dev/null
>+++ b/doc/guides/tools/flow-perf.rst
>@@ -0,0 +1,69 @@
>+..	SPDX-License-Identifier: BSD-3-Clause
>+	Copyright 2020 Mellanox Technologies, Ltd
>+
>+RTE Flow performance tool
>+=========================
>+
>+Application for rte_flow performance testing.
>+
>+
>+Compiling the Application
>+=========================
>+The ``test-flow-perf`` application is compiled as part of the main
>+compilation of the DPDK libraries and tools.
>+
>+Refer to the DPDK Getting Started Guides for details.
>+The basic compilation steps are:
>+
>+#. Set the required environmental variables and go to the source directory:
>+
>+	.. code-block:: console
>+
>+		export RTE_SDK=/path/to/rte_sdk
>+		cd $RTE_SDK
>+
>+#. Set the compilation target. For example:
>+
>+	.. code-block:: console
>+
>+		export RTE_TARGET=x86_64-native-linux-gcc
>+
>+#. Build the application:
>+
>+	.. code-block:: console
>+
>+		make install T=$RTE_TARGET
>+
>+#. The compiled application will be located at:
>+
>+	.. code-block:: console
>+
>+		$RTE_SDK/$RTE_TARGET/app/flow-perf
>+
>+
>+Running the Application
>+=======================
>+
>+EAL Command-line Options
>+------------------------
>+
>+Please refer to :doc:`EAL parameters (Linux)
>+<../linux_gsg/linux_eal_parameters>`
>+or :doc:`EAL parameters (FreeBSD)
>+<../freebsd_gsg/freebsd_eal_parameters>` for a list of available EAL
>command-line options.
>+
>+
>+Flow performance Options
>+------------------------
>+
>+The following are the command-line options for the flow performance
>application.
>+They must be separated from the EAL options, shown in the previous
>+section, with a ``--`` separator:
>+
>+.. code-block:: console
>+
>+	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
>+
>+The command line options are:
>+
>+*	``--help``
>+	Display a help message and quit.
>diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst index
>782b30864e..7279daebc6 100644
>--- a/doc/guides/tools/index.rst
>+++ b/doc/guides/tools/index.rst
>@@ -16,3 +16,4 @@ DPDK Tools User Guides
>     cryptoperf
>     comp_perf
>     testeventdev
>+    flow-perf
>--
>2.17.1


Any comments guys?

BRs,
Wisam Jaddo

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-16 15:12   ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Monther
@ 2020-04-17  2:05     ` Xiaoyu Min
  2020-04-28  8:22       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-17  2:05 UTC (permalink / raw)
  To: Wisam Monther; +Cc: dev, jerinjacobk, Thomas Monjalon

On Thu, 20-04-16, 23:12, Wisam Monther wrote:
> 
> 
> >-----Original Message-----
> >From: dev <dev-bounces@dpdk.org> On Behalf Of Wisam Jaddo
> >Sent: Thursday, April 9, 2020 6:43 PM
> >To: dev@dpdk.org; Jack Min <jackmin@mellanox.com>;
> >jerinjacobk@gmail.com
> >Cc: Thomas Monjalon <thomas@monjalon.net>
> >Subject: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance
> >skeleton
> >
> >Add flow performance application skeleton.
> >
> >Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-17  2:07     ` Xiaoyu Min
  2020-04-28  8:25       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-17  2:07 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, jerinjacobk, thomas

On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
> Add insertion rate calculation feature into flow
> performance application.
> 
> The application now provide the ability to test
> insertion rate of specific rte_flow rule, by
> stressing it to the NIC, and calculate the
> insertion rate.
> 
> The application offers some options in the command
> line, to configure which rule to apply.
> 
> After that the application will start producing
> rules with same pattern but increasing the outer IP
> source address by 1 each time, thus it will give
> different flow each time, and all other items will
> have open masks.
> 
> The current design have single core insertion rate.
> In the future we may have a multi core insertion
> rate measurement support in the app.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-17  2:07     ` Xiaoyu Min
  2020-04-28  8:25       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-17  2:07 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, jerinjacobk, thomas

On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
> Add the ability to test deletion rate for flow performance
> application.
> 
> This feature is disabled by default, and can be enabled by
> add "--deletion-rate" in the application command line options.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-17  2:08     ` Xiaoyu Min
  2020-04-28  8:25       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-17  2:08 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, jerinjacobk, thomas, Suanming Mou

On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
> Introduce new feature to dump memory statistics of each socket
> and a total for all before and after the creation.
> 
> This will give two main advantage:
> 1- Check the memory consumption for large number of flows
> "insertion rate scenario alone"
> 
> 2- Check that no memory leackage after doing insertion then
> deletion.
> 
> Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
@ 2020-04-17  2:09     ` Xiaoyu Min
  2020-04-28  8:26       ` Wisam Monther
  2020-04-28 14:09     ` Or Gerlitz
  1 sibling, 1 reply; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-17  2:09 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, jerinjacobk, thomas

On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
> Introduce packet forwarding support to the app to do
> some performance measurements.
> 
> The measurements are reported in term of packet per
> second unit. The forwarding will start after the end
> of insertion/deletion operations.
> 
> The support has single and multi performance measurements.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-17  2:05     ` Xiaoyu Min
@ 2020-04-28  8:22       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-28  8:22 UTC (permalink / raw)
  To: Jack Min; +Cc: dev, jerinjacobk, Thomas Monjalon

Thanks Jack,

Jerin,
Can you please review it according to our discussion in the RFC?

>-----Original Message-----
>From: Jack Min <jackmin@mellanox.com>
>Sent: Friday, April 17, 2020 5:06 AM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; jerinjacobk@gmail.com; Thomas Monjalon
><thomas@monjalon.net>
>Subject: Re: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow
>performance skeleton
>
>On Thu, 20-04-16, 23:12, Wisam Monther wrote:
>>
>>
>> >-----Original Message-----
>> >From: dev <dev-bounces@dpdk.org> On Behalf Of Wisam Jaddo
>> >Sent: Thursday, April 9, 2020 6:43 PM
>> >To: dev@dpdk.org; Jack Min <jackmin@mellanox.com>;
>> >jerinjacobk@gmail.com
>> >Cc: Thomas Monjalon <thomas@monjalon.net>
>> >Subject: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow
>> >performance skeleton
>> >
>> >Add flow performance application skeleton.
>> >
>> >Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-17  2:07     ` Xiaoyu Min
@ 2020-04-28  8:25       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-28  8:25 UTC (permalink / raw)
  To: Jack Min; +Cc: dev, jerinjacobk, Thomas Monjalon

Thanks Jack,

Jerin,
Can you please review it according to our discussion in the RFC?

>-----Original Message-----
>From: Jack Min <jackmin@mellanox.com>
>Sent: Friday, April 17, 2020 5:07 AM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; jerinjacobk@gmail.com; Thomas Monjalon
><thomas@monjalon.net>
>Subject: Re: [PATCH 2/5] app/test-flow-perf: add insertion rate calculation
>
>On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
>> Add insertion rate calculation feature into flow performance
>> application.
>>
>> The application now provide the ability to test insertion rate of
>> specific rte_flow rule, by stressing it to the NIC, and calculate the
>> insertion rate.
>>
>> The application offers some options in the command line, to configure
>> which rule to apply.
>>
>> After that the application will start producing rules with same
>> pattern but increasing the outer IP source address by 1 each time,
>> thus it will give different flow each time, and all other items will
>> have open masks.
>>
>> The current design have single core insertion rate.
>> In the future we may have a multi core insertion rate measurement
>> support in the app.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-17  2:07     ` Xiaoyu Min
@ 2020-04-28  8:25       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-28  8:25 UTC (permalink / raw)
  To: Jack Min; +Cc: dev, jerinjacobk, Thomas Monjalon

Thanks Jack,

Jerin,
Can you please review it according to our discussion in the RFC?

>-----Original Message-----
>From: Jack Min <jackmin@mellanox.com>
>Sent: Friday, April 17, 2020 5:08 AM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; jerinjacobk@gmail.com; Thomas Monjalon
><thomas@monjalon.net>
>Subject: Re: [PATCH 3/5] app/test-flow-perf: add deletion rate calculation
>
>On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
>> Add the ability to test deletion rate for flow performance
>> application.
>>
>> This feature is disabled by default, and can be enabled by add
>> "--deletion-rate" in the application command line options.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 4/5] app/test-flow-perf: add memory dump to app
  2020-04-17  2:08     ` Xiaoyu Min
@ 2020-04-28  8:25       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-28  8:25 UTC (permalink / raw)
  To: Jack Min; +Cc: dev, jerinjacobk, Thomas Monjalon, Suanming Mou

Thanks Jack,

Jerin,
Can you please review it according to our discussion in the RFC?

>-----Original Message-----
>From: Jack Min <jackmin@mellanox.com>
>Sent: Friday, April 17, 2020 5:09 AM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; jerinjacobk@gmail.com; Thomas Monjalon
><thomas@monjalon.net>; Suanming Mou <suanmingm@mellanox.com>
>Subject: Re: [PATCH 4/5] app/test-flow-perf: add memory dump to app
>
>On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
>> Introduce new feature to dump memory statistics of each socket and a
>> total for all before and after the creation.
>>
>> This will give two main advantage:
>> 1- Check the memory consumption for large number of flows "insertion
>> rate scenario alone"
>>
>> 2- Check that no memory leackage after doing insertion then deletion.
>>
>> Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-17  2:09     ` Xiaoyu Min
@ 2020-04-28  8:26       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-28  8:26 UTC (permalink / raw)
  To: Jack Min; +Cc: dev, jerinjacobk, Thomas Monjalon

Thanks Jack,

Jerin,
Can you please review it according to our discussion in the RFC?

>-----Original Message-----
>From: Jack Min <jackmin@mellanox.com>
>Sent: Friday, April 17, 2020 5:09 AM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; jerinjacobk@gmail.com; Thomas Monjalon
><thomas@monjalon.net>
>Subject: Re: [PATCH 5/5] app/test-flow-perf: add packet forwarding support
>
>On Thu, 20-04-09, 15:42, Wisam Jaddo wrote:
>> Introduce packet forwarding support to the app to do some performance
>> measurements.
>>
>> The measurements are reported in term of packet per second unit. The
>> forwarding will start after the end of insertion/deletion operations.
>>
>> The support has single and multi performance measurements.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>Reviewed-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-09 15:42   ` [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  2020-04-17  2:09     ` Xiaoyu Min
@ 2020-04-28 14:09     ` Or Gerlitz
  2020-04-29  9:49       ` Wisam Monther
  1 sibling, 1 reply; 102+ messages in thread
From: Or Gerlitz @ 2020-04-28 14:09 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, Jack Min, jerinjacobk, Thomas Monjalon

On Thu, Apr 9, 2020 at 6:44 PM Wisam Jaddo <wisamm@mellanox.com> wrote:
>
> Introduce packet forwarding support to the app to do
> some performance measurements.
>
> The measurements are reported in term of packet per
> second unit. The forwarding will start after the end
> of insertion/deletion operations.
>
> The support has single and multi performance measurements.

single and multi core? if not, then multi of what?

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-28 14:09     ` Or Gerlitz
@ 2020-04-29  9:49       ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-04-29  9:49 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: dev, Jack Min, jerinjacobk, Thomas Monjalon

>-----Original Message-----
>From: Or Gerlitz <gerlitz.or@gmail.com>
>Sent: Tuesday, April 28, 2020 5:10 PM
>To: Wisam Monther <wisamm@mellanox.com>
>Cc: dev@dpdk.org; Jack Min <jackmin@mellanox.com>;
>jerinjacobk@gmail.com; Thomas Monjalon <thomas@monjalon.net>
>Subject: Re: [dpdk-dev] [PATCH 5/5] app/test-flow-perf: add packet
>forwarding support
>
>On Thu, Apr 9, 2020 at 6:44 PM Wisam Jaddo <wisamm@mellanox.com>
>wrote:
>>
>> Introduce packet forwarding support to the app to do some performance
>> measurements.
>>
>> The measurements are reported in term of packet per second unit. The
>> forwarding will start after the end of insertion/deletion operations.
>>
>> The support has single and multi performance measurements.
>
>single and multi core? if not, then multi of what?

Yes single core and multi-core

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application ***
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                     ` (4 preceding siblings ...)
  2020-04-16 15:12   ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Monther
@ 2020-04-30  7:08   ` Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                       ` (4 more replies)
  2020-05-06  3:00   ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Ajit Khaparde
  6 siblings, 5 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev; +Cc: root

From: root <root@dragon56.rdmz.labs.mlnx>

Add new application to test rte flow performance from:
- Insertion rate.
- Deletion rate.
- Memory consumption.
- PPS forward measurement.

---
v2:
* reset cpu_time_used every port.
* generate different RSS action every flow with different RETA.
* Fix in commit log message

Wisam Jaddo (5):
  app/test-flow-perf: add flow performance skeleton
  app/test-flow-perf: add insertion rate calculation
  app/test-flow-perf: add deletion rate calculation
  app/test-flow-perf: add memory dump to app
  app/test-flow-perf: add packet forwarding support

 MAINTAINERS                          |    5 +
 app/Makefile                         |    1 +
 app/meson.build                      |    1 +
 app/test-flow-perf/Makefile          |   29 +
 app/test-flow-perf/actions_gen.c     |   86 +++
 app/test-flow-perf/actions_gen.h     |   48 ++
 app/test-flow-perf/flow_gen.c        |  176 +++++
 app/test-flow-perf/flow_gen.h        |   61 ++
 app/test-flow-perf/items_gen.c       |  265 +++++++
 app/test-flow-perf/items_gen.h       |   68 ++
 app/test-flow-perf/main.c            | 1071 ++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |   19 +
 app/test-flow-perf/user_parameters.h |   31 +
 config/common_base                   |    5 +
 doc/guides/tools/flow-perf.rst       |  265 +++++++
 doc/guides/tools/index.rst           |    1 +
 16 files changed, 2132 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
@ 2020-04-30  7:08     ` Wisam Jaddo
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                          |   5 +
 app/Makefile                         |   1 +
 app/meson.build                      |   1 +
 app/test-flow-perf/Makefile          |  26 +++
 app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |  11 ++
 app/test-flow-perf/user_parameters.h |  16 ++
 config/common_base                   |   5 +
 doc/guides/tools/flow-perf.rst       |  69 ++++++++
 doc/guides/tools/index.rst           |   1 +
 10 files changed, 381 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index d31a809292..b5632c1bf5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index 823771c5fc..bd823f3db7 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
diff --git a/app/meson.build b/app/meson.build
index 0f7fe94649..e26f5b72f5 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -14,6 +14,7 @@ apps = [
 	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
+	'test-flow-perf',
 	'test-fib',
 	'test-pipeline',
 	'test-pmd',
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 0000000000..45b1fb1464
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = flow_perf
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-deprecated-declarations
+CFLAGS += -Wno-unused-function
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 0000000000..156b9ef553
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+
+#include "user_parameters.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void usage(char *progname)
+{
+	printf("\nusage: %s", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			usage(argv[0]);
+			printf("Invalid option: %s\n", argv[optind]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i, j;
+	uint16_t port_id;
+	uint16_t nr_ports = rte_eth_dev_count_avail();
+	struct rte_eth_hairpin_conf hairpin_conf = {
+			.peer_count = 1,
+	};
+	struct rte_eth_conf port_conf = {
+		.rxmode = {
+			.split_hdr_size = 0,
+		},
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+					ETH_RSS_IP  |
+					ETH_RSS_UDP |
+					ETH_RSS_TCP,
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"Error during getting device (port %u) info: %s\n",
+					port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		printf(":: initializing port: %d\n", port_id);
+		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
+				TXQs + HAIRPIN_QUEUES, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+					":: cannot configure device: err=%d, port=%u\n",
+					ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+		for (i = 0; i < RXQs; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+						rte_eth_dev_socket_id(port_id),
+						&rxq_conf,
+						mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQs; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+						rte_eth_dev_socket_id(port_id),
+						&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					":: promiscuous mode enable failed: err=%s, port=%u\n",
+					rte_strerror(-ret), port_id);
+
+		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + TXQs;
+			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+							NR_RXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + RXQs;
+			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+							NR_TXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	uint16_t lcore_id;
+	uint16_t port;
+	uint16_t nr_ports;
+	int ret;
+	struct rte_flow_error error;
+
+	nr_ports = rte_eth_dev_count_avail();
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+
+	if (rte_eal_wait_lcore(lcore_id) < 0)
+		break;
+
+	for (port = 0; port < nr_ports; port++) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 0000000000..ec9bb3b3aa
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+	'main.c',
+)
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
new file mode 100644
index 0000000000..56ec7f47b5
--- /dev/null
+++ b/app/test-flow-perf/user_parameters.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ *
+ * This file will hold the user parameters values
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+/** Configuration **/
+#define RXQs 4
+#define TXQs 4
+#define HAIRPIN_QUEUES 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/config/common_base b/config/common_base
index 14000ba07e..eaaeaaaee2 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1124,3 +1124,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 0000000000..30ce1b6cc0
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,69 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+RTE Flow performance tool
+=========================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+The basic compilation steps are:
+
+#. Set the required environmental variables and go to the source directory:
+
+	.. code-block:: console
+
+		export RTE_SDK=/path/to/rte_sdk
+		cd $RTE_SDK
+
+#. Set the compilation target. For example:
+
+	.. code-block:: console
+
+		export RTE_TARGET=x86_64-native-linux-gcc
+
+#. Build the application:
+
+	.. code-block:: console
+
+		make install T=$RTE_TARGET
+
+#. The compiled application will be located at:
+
+	.. code-block:: console
+
+		$RTE_SDK/$RTE_TARGET/app/flow-perf
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section, with
+a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864e..7279daebc6 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30  7:08     ` Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile          |   3 +
 app/test-flow-perf/actions_gen.c     |  86 ++++++
 app/test-flow-perf/actions_gen.h     |  48 ++++
 app/test-flow-perf/flow_gen.c        | 176 ++++++++++++
 app/test-flow-perf/flow_gen.h        |  61 ++++
 app/test-flow-perf/items_gen.c       | 265 +++++++++++++++++
 app/test-flow-perf/items_gen.h       |  68 +++++
 app/test-flow-perf/main.c            | 416 +++++++++++++++++++++++++--
 app/test-flow-perf/meson.build       |   8 +
 app/test-flow-perf/user_parameters.h |  15 +
 doc/guides/tools/flow-perf.rst       | 186 +++++++++++-
 11 files changed, 1307 insertions(+), 25 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index 45b1fb1464..968c7c60dd 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -19,6 +19,9 @@ CFLAGS += -Wno-unused-function
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 0000000000..564ed820e4
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+void
+gen_mark(void)
+{
+	mark_action.id = MARK_ID;
+}
+
+void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+void
+gen_jump(uint16_t next_table)
+{
+	jump_action.group = next_table;
+}
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number)
+{
+	uint16_t queue;
+	struct action_rss_data *rss_data;
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = ETH_RSS_IP,
+			.key_len = 0,
+			.queue_num = queues_number,
+			.key = 0,
+			.queue = rss_data->queue,
+		},
+		.key = { 0 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+}
+
+void
+gen_set_meta(void)
+{
+	meta_action.data = RTE_BE32(META_DATA);
+	meta_action.mask = RTE_BE32(0xffffffff);
+}
+
+void
+gen_set_tag(void)
+{
+	tag_action.data = RTE_BE32(META_DATA);
+	tag_action.mask = RTE_BE32(0xffffffff);
+	tag_action.index = TAG_INDEX;
+}
+
+void
+gen_port_id(void)
+{
+	port_id.id = PORT_ID_DST;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 0000000000..556d48b871
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,48 @@
+/** SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#ifndef _ACTION_GEN_
+#define _ACTION_GEN_
+
+struct rte_flow_action_mark mark_action;
+struct rte_flow_action_queue queue_action;
+struct rte_flow_action_jump jump_action;
+struct rte_flow_action_rss *rss_action;
+struct rte_flow_action_set_meta meta_action;
+struct rte_flow_action_set_tag tag_action;
+struct rte_flow_action_port_id port_id;
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[64];
+	uint16_t queue[128];
+} action_rss_data;
+
+void
+gen_mark(void);
+
+void
+gen_queue(uint16_t queue);
+
+void
+gen_jump(uint16_t next_table);
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number);
+
+void
+gen_set_meta(void);
+
+void
+gen_set_tag(void);
+
+void
+gen_port_id(void);
+
+#endif
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 0000000000..32ddd2958f
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & META_ITEM)
+		add_meta_data(items, items_counter++);
+	if (flow_items & TAG_ITEM)
+		add_meta_tag(items, items_counter++);
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & VLAN_ITEM)
+		add_vlan(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & IPV6_ITEM)
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & TCP_ITEM)
+		add_tcp(items, items_counter++);
+	if (flow_items & UDP_ITEM)
+		add_udp(items, items_counter++);
+	if (flow_items & VXLAN_ITEM)
+		add_vxlan(items, items_counter++);
+	if (flow_items & VXLAN_GPE_ITEM)
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & GRE_ITEM)
+		add_gre(items, items_counter++);
+	if (flow_items & GENEVE_ITEM)
+		add_geneve(items, items_counter++);
+	if (flow_items & GTP_ITEM)
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint32_t counter, uint16_t next_table)
+{
+	uint8_t actions_counter = 0;
+	uint16_t queues[RXQs];
+	uint16_t hairpin_queues[HAIRPIN_QUEUES];
+	uint16_t i;
+	struct rte_flow_action_count count_action;
+	uint8_t temp = counter & 0xff;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		if (!counter)
+			gen_mark();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+	if (flow_actions & COUNT_ACTION) {
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+		actions[actions_counter++].conf = &count_action;
+	}
+	if (flow_actions & META_ACTION) {
+		if (!counter)
+			gen_set_meta();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+		actions[actions_counter++].conf = &meta_action;
+	}
+	if (flow_actions & TAG_ACTION) {
+		if (!counter)
+			gen_set_tag();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+		actions[actions_counter++].conf = &tag_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & RSS_ACTION) {
+		for (i = 0; i < RXQs; i++)
+			queues[i] = (temp >> (i << 1)) & 0x3;
+		gen_rss(queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+	if (flow_actions & JUMP_ACTION) {
+		if (!counter)
+			gen_jump(next_table);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+		actions[actions_counter++].conf = &jump_action;
+	}
+	if (flow_actions & PORT_ID_ACTION) {
+		if (!counter)
+			gen_port_id();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+		actions[actions_counter++].conf = &port_id;
+	}
+	if (flow_actions & DROP_ACTION)
+		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
+		gen_queue((counter % HAIRPIN_QUEUES) + RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		for (i = 0; i < RXQs; i++)
+			hairpin_queues[i] = (temp >> (i << 1)) & 0x3;
+		gen_rss(queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+			outer_ip_src, next_table);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 0000000000..99cb9e3791
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _FLOW_GEN_
+#define _FLOW_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+/* Items */
+#define ETH_ITEM       0x0001
+#define IPV4_ITEM      0x0002
+#define IPV6_ITEM      0x0004
+#define VLAN_ITEM      0x0008
+#define TCP_ITEM       0x0010
+#define UDP_ITEM       0x0020
+#define VXLAN_ITEM     0x0040
+#define VXLAN_GPE_ITEM 0x0080
+#define GRE_ITEM       0x0100
+#define GENEVE_ITEM    0x0200
+#define GTP_ITEM       0x0400
+#define META_ITEM      0x0800
+#define TAG_ITEM       0x1000
+
+/* Actions */
+#define QUEUE_ACTION   0x0001
+#define MARK_ACTION    0x0002
+#define JUMP_ACTION    0x0004
+#define RSS_ACTION     0x0008
+#define COUNT_ACTION   0x0010
+#define META_ACTION    0x0020
+#define TAG_ACTION     0x0040
+#define DROP_ACTION    0x0080
+#define PORT_ID_ACTION 0x0100
+#define HAIRPIN_QUEUE_ACTION 0x0200
+#define HAIRPIN_RSS_ACTION   0x0400
+
+/* Attributes */
+#define INGRESS  0x0001
+#define EGRESS   0x0002
+#define TRANSFER 0x0004
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error);
+
+#endif
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 0000000000..fb9733d4e7
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "user_parameters.h"
+
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+static struct rte_flow_item_vlan vlan_spec;
+static struct rte_flow_item_vlan vlan_mask;
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+static struct rte_flow_item_ipv6 ipv6_spec;
+static struct rte_flow_item_ipv6 ipv6_mask;
+static struct rte_flow_item_udp udp_spec;
+static struct rte_flow_item_udp udp_mask;
+static struct rte_flow_item_tcp tcp_spec;
+static struct rte_flow_item_tcp tcp_mask;
+static struct rte_flow_item_vxlan vxlan_spec;
+static struct rte_flow_item_vxlan vxlan_mask;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+static struct rte_flow_item_gre gre_spec;
+static struct rte_flow_item_gre gre_mask;
+static struct rte_flow_item_geneve geneve_spec;
+static struct rte_flow_item_geneve geneve_mask;
+static struct rte_flow_item_gtp gtp_spec;
+static struct rte_flow_item_gtp gtp_mask;
+static struct rte_flow_item_meta meta_spec;
+static struct rte_flow_item_meta meta_mask;
+static struct rte_flow_item_tag tag_spec;
+static struct rte_flow_item_tag tag_mask;
+
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+	eth_spec.type = 0;
+	eth_mask.type = 0;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t vlan_value = VLAN_VALUE;
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = 0xffffffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6)
+{
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+					sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 1,
+					sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags **/
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t proto = GRE_PROTO;
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = 0xffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t teid_value = TEID_VALUE;
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	uint8_t index = TAG_INDEX;
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 0000000000..0b01385951
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items related methods
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _ITEMS_GEN_
+#define _ITEMS_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6);
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+#endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 156b9ef553..115af4f302 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,29 +49,119 @@
 #include <rte_cycles.h>
 #include <rte_memory.h>
 
+#include "flow_gen.h"
 #include "user_parameters.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS 100
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static volatile bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
 
 static void usage(char *progname)
 {
 	printf("\nusage: %s", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt;
 	int opt_idx;
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              0, 0, 0 },
+		{ "hairpin-rss",                0, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -80,6 +170,140 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+			/* Attributes */
+			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
+				flow_attrs |= INGRESS;
+				printf("ingress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "egress")) {
+				flow_attrs |= EGRESS;
+				printf("egress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
+				flow_attrs |= TRANSFER;
+				printf("transfer ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "group")) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+			/* Items */
+			if (!strcmp(lgopts[opt_idx].name, "ether")) {
+				flow_items |= ETH_ITEM;
+				printf("ether / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
+				flow_items |= IPV4_ITEM;
+				printf("ipv4 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vlan")) {
+				flow_items |= VLAN_ITEM;
+				printf("vlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv6")) {
+				flow_items |= IPV6_ITEM;
+				printf("ipv6 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tcp")) {
+				flow_items |= TCP_ITEM;
+				printf("tcp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "udp")) {
+				flow_items |= UDP_ITEM;
+				printf("udp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan")) {
+				flow_items |= VXLAN_ITEM;
+				printf("vxlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan-gpe")) {
+				flow_items |= VXLAN_GPE_ITEM;
+				printf("vxlan-gpe / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gre")) {
+				flow_items |= GRE_ITEM;
+				printf("gre / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "geneve")) {
+				flow_items |= GENEVE_ITEM;
+				printf("geneve / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gtp")) {
+				flow_items |= GTP_ITEM;
+				printf("gtp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "meta")) {
+				flow_items |= META_ITEM;
+				printf("meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tag")) {
+				flow_items |= TAG_ITEM;
+				printf("tag / ");
+			}
+			/* Actions */
+			if (!strcmp(lgopts[opt_idx].name, "port-id")) {
+				flow_actions |= PORT_ID_ACTION;
+				printf("port-id / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "rss")) {
+				flow_actions |= RSS_ACTION;
+				printf("rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "queue")) {
+				flow_actions |= QUEUE_ACTION;
+				printf("queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "jump")) {
+				flow_actions |= JUMP_ACTION;
+				printf("jump / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "mark")) {
+				flow_actions |= MARK_ACTION;
+				printf("mark / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "count")) {
+				flow_actions |= COUNT_ACTION;
+				printf("count / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-meta")) {
+				flow_actions |= META_ACTION;
+				printf("set-meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-tag")) {
+				flow_actions |= TAG_ACTION;
+				printf("set-tag / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "drop")) {
+				flow_actions |= DROP_ACTION;
+				printf("drop / ");
+			}
+
+			/* Control */
+			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
+				dump_iterations = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -88,6 +312,128 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		cpu_time_used = 0;
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jumo into flow_group
+			 * This way the app will avoid the default rules
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
+				JUMP_ACTION, flow_group, 0, &error);
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i,  &error);
+				if (!flow)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
@@ -96,6 +442,8 @@ init_port(void)
 	int ret;
 	uint16_t i, j;
 	uint16_t port_id;
+	uint16_t nr_queues;
+	bool hairpin_flag = false;
 	uint16_t nr_ports = rte_eth_dev_count_avail();
 	struct rte_eth_hairpin_conf hairpin_conf = {
 			.peer_count = 1,
@@ -115,6 +463,13 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQs;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION ||
+		flow_actions & HAIRPIN_RSS_ACTION) {
+		nr_queues = RXQs + HAIRPIN_QUEUES;
+		hairpin_flag = true;
+	}
+
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
 	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
@@ -134,8 +489,8 @@ init_port(void)
 
 		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
 		printf(":: initializing port: %d\n", port_id);
-		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
-				TXQs + HAIRPIN_QUEUES, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 					":: cannot configure device: err=%d, port=%u\n",
@@ -173,26 +528,30 @@ init_port(void)
 					":: promiscuous mode enable failed: err=%s, port=%u\n",
 					rte_strerror(-ret), port_id);
 
-		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + TXQs;
-			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
-							NR_RXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
-		}
+		if (hairpin_flag) {
+			for (i = RXQs, j = 0;
+					i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + TXQs;
+				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+					NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 
-		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + RXQs;
-			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
-							NR_TXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
+			for (i = TXQs, j = 0;
+					i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + RXQs;
+				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+					NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 		}
 
 		ret = rte_eth_dev_start(port_id);
@@ -219,6 +578,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = 4000000;
+	iterations_number = 100000;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 
@@ -232,6 +600,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index ec9bb3b3aa..b3941f5c2d 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -5,7 +5,15 @@
 #
 # To build this example as a standalone application with an already-installed
 # DPDK instance, use 'make'
+name = 'flow_perf'
+allow_experimental_apis = true
+cflags += '-Wno-deprecated-declarations'
+cflags += '-Wunused-function'
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
+deps += ['ethdev']
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
index 56ec7f47b5..1d157430b6 100644
--- a/app/test-flow-perf/user_parameters.h
+++ b/app/test-flow-perf/user_parameters.h
@@ -14,3 +14,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/** Items/Actions parameters **/
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/** Flow items/acctions max size **/
+#define MAX_ITEMS_NUM 20
+#define MAX_ACTIONS_NUM 20
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 30ce1b6cc0..62e038c430 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -4,7 +4,19 @@
 RTE Flow performance tool
 =========================
 
-Application for rte_flow performance testing.
+Application for rte_flow performance testing. The application provide the
+ability to test insertion rate of specific rte_flow rule, by stressing it
+to the NIC, and calculate the insertion rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+The current design have single core insertion rate. In the future we may
+have a multi core insertion rate measurement support in the app.
 
 
 Compiling the Application
@@ -61,9 +73,179 @@ a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+	sudo ./flow_perf -n 4 -w 08:00.0,dv_flow_en=1 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-30  7:08     ` " Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 86 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  4 ++
 2 files changed, 90 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 115af4f302..7c11c0b577 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -75,6 +76,8 @@ static void usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -123,6 +126,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -304,6 +308,8 @@ args_parse(int argc, char **argv)
 			}
 			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
 				dump_iterations = true;
+			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
+				delete_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -323,9 +329,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (!flow_list[i])
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used;
@@ -337,6 +409,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -348,8 +421,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
 		cpu_time_used = 0;
+		flow_index = 0;
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jumo into flow_group
@@ -366,6 +445,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -389,6 +469,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -422,6 +504,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -580,6 +665,7 @@ main(int argc, char **argv)
 
 	force_quit = false;
 	dump_iterations = false;
+	delete_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 62e038c430..e07e659df5 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,6 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -89,6 +91,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 4/5] app/test-flow-perf: add memory dump to app
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
                       ` (2 preceding siblings ...)
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-30  7:08     ` Wisam Jaddo
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev; +Cc: Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 69 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  6 ++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 7c11c0b577..95435910de 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -78,6 +79,7 @@ static void usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: to dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -127,6 +129,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -310,6 +313,8 @@ args_parse(int argc, char **argv)
 				dump_iterations = true;
 			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
 				delete_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -321,6 +326,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -657,6 +718,7 @@ main(int argc, char **argv)
 	uint16_t nr_ports;
 	int ret;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	nr_ports = rte_eth_dev_count_avail();
 	ret = rte_eal_init(argc, argv);
@@ -666,6 +728,7 @@ main(int argc, char **argv)
 	force_quit = false;
 	dump_iterations = false;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
@@ -686,7 +749,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index e07e659df5..28d452fd06 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,7 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -94,6 +95,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v2 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
                       ` (3 preceding siblings ...)
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-30  7:08     ` Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  7:08 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi core performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 300 +++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |   6 +
 2 files changed, 306 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 95435910de..2596d05dc2 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -60,14 +60,45 @@ static uint8_t flow_group;
 static uint16_t flow_items;
 static uint16_t flow_actions;
 static uint8_t flow_attrs;
+
 static volatile bool force_quit;
 static volatile bool dump_iterations;
 static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
+static volatile bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST 32
+#define LCORE_MODE_PKT 1
+#define LCORE_MODE_STATS 2
+#define MAX_STREAMS 64
+#define MAX_LCORES 64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void usage(char *progname)
 {
@@ -80,6 +111,8 @@ static void usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: to dump all socket memory\n");
+	printf("  --enable-fwd: to enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -130,6 +163,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -315,6 +349,8 @@ args_parse(int argc, char **argv)
 				delete_flag = true;
 			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
 				dump_socket_mem_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "enable-fwd"))
+				enable_fwd = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -582,6 +618,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%6s %16s %16s %16s\n", "------", "----------------",
+			"----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/* First logical core is reserved for stats printing */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQs;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		/** Create FWD stream **/
+		for (queue = 0; queue < RXQs; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/** Print all streams **/
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/** No streams for this core **/
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
 static void
 init_port(void)
 {
@@ -757,6 +1052,11 @@ main(int argc, char **argv)
 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
 		(alloc - last_alloc) / 1.0e6);
 
+	if (enable_fwd) {
+		init_lcore_info();
+		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+	}
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 28d452fd06..ecd760de81 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -21,6 +21,8 @@ have a multi core insertion rate measurement support in the app.
 The application also provide the ability to measure rte flow deletion rate,
 in addition to memory consumption before and after the flows creation.
 
+The app supports single and multi core performance measurements.
+
 
 Compiling the Application
 =========================
@@ -98,6 +100,10 @@ The command line options are:
 *	``--dump-socket-mem``
 	Dump the memory stats for each socket before the insertion and after.
 
+*	``enable-fwd``
+	Enable packets forwarding after insertion/deletion operations.
+
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application ***
  2020-04-30  7:08     ` [dpdk-dev] [PATCH v2 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30  9:32       ` Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                           ` (4 more replies)
  0 siblings, 5 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add new application to test rte flow performance from:
- Insertion rate.
- Deletion rate.
- Memory consumption.
- PPS forward measurement.

---
v3:
* Fix passing hairpin queues to hairpin rss action.

v2:
* reset cpu_time_used every port.
* generate different RSS action every flow with different RETA.
* Fix in commit log message

Wisam Jaddo (5):
  app/test-flow-perf: add flow performance skeleton
  app/test-flow-perf: add insertion rate calculation
  app/test-flow-perf: add deletion rate calculation
  app/test-flow-perf: add memory dump to app
  app/test-flow-perf: add packet forwarding support

 MAINTAINERS                          |    5 +
 app/Makefile                         |    1 +
 app/meson.build                      |    1 +
 app/test-flow-perf/Makefile          |   29 +
 app/test-flow-perf/actions_gen.c     |   86 +++
 app/test-flow-perf/actions_gen.h     |   48 ++
 app/test-flow-perf/flow_gen.c        |  176 +++++
 app/test-flow-perf/flow_gen.h        |   61 ++
 app/test-flow-perf/items_gen.c       |  265 +++++++
 app/test-flow-perf/items_gen.h       |   68 ++
 app/test-flow-perf/main.c            | 1071 ++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |   19 +
 app/test-flow-perf/user_parameters.h |   31 +
 config/common_base                   |    5 +
 doc/guides/tools/flow-perf.rst       |  265 +++++++
 doc/guides/tools/index.rst           |    1 +
 16 files changed, 2132 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
@ 2020-04-30  9:32         ` Wisam Jaddo
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                          |   5 +
 app/Makefile                         |   1 +
 app/meson.build                      |   1 +
 app/test-flow-perf/Makefile          |  26 +++
 app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |  11 ++
 app/test-flow-perf/user_parameters.h |  16 ++
 config/common_base                   |   5 +
 doc/guides/tools/flow-perf.rst       |  69 ++++++++
 doc/guides/tools/index.rst           |   1 +
 10 files changed, 381 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index d31a809292..b5632c1bf5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index 823771c5fc..bd823f3db7 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
diff --git a/app/meson.build b/app/meson.build
index 0f7fe94649..e26f5b72f5 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -14,6 +14,7 @@ apps = [
 	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
+	'test-flow-perf',
 	'test-fib',
 	'test-pipeline',
 	'test-pmd',
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 0000000000..45b1fb1464
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = flow_perf
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-deprecated-declarations
+CFLAGS += -Wno-unused-function
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 0000000000..156b9ef553
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+
+#include "user_parameters.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void usage(char *progname)
+{
+	printf("\nusage: %s", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			usage(argv[0]);
+			printf("Invalid option: %s\n", argv[optind]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i, j;
+	uint16_t port_id;
+	uint16_t nr_ports = rte_eth_dev_count_avail();
+	struct rte_eth_hairpin_conf hairpin_conf = {
+			.peer_count = 1,
+	};
+	struct rte_eth_conf port_conf = {
+		.rxmode = {
+			.split_hdr_size = 0,
+		},
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+					ETH_RSS_IP  |
+					ETH_RSS_UDP |
+					ETH_RSS_TCP,
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"Error during getting device (port %u) info: %s\n",
+					port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		printf(":: initializing port: %d\n", port_id);
+		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
+				TXQs + HAIRPIN_QUEUES, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+					":: cannot configure device: err=%d, port=%u\n",
+					ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+		for (i = 0; i < RXQs; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+						rte_eth_dev_socket_id(port_id),
+						&rxq_conf,
+						mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQs; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+						rte_eth_dev_socket_id(port_id),
+						&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					":: promiscuous mode enable failed: err=%s, port=%u\n",
+					rte_strerror(-ret), port_id);
+
+		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + TXQs;
+			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+							NR_RXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + RXQs;
+			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+							NR_TXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	uint16_t lcore_id;
+	uint16_t port;
+	uint16_t nr_ports;
+	int ret;
+	struct rte_flow_error error;
+
+	nr_ports = rte_eth_dev_count_avail();
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+
+	if (rte_eal_wait_lcore(lcore_id) < 0)
+		break;
+
+	for (port = 0; port < nr_ports; port++) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 0000000000..ec9bb3b3aa
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+	'main.c',
+)
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
new file mode 100644
index 0000000000..56ec7f47b5
--- /dev/null
+++ b/app/test-flow-perf/user_parameters.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ *
+ * This file will hold the user parameters values
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+/** Configuration **/
+#define RXQs 4
+#define TXQs 4
+#define HAIRPIN_QUEUES 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/config/common_base b/config/common_base
index 14000ba07e..eaaeaaaee2 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1124,3 +1124,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 0000000000..30ce1b6cc0
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,69 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+RTE Flow performance tool
+=========================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+The basic compilation steps are:
+
+#. Set the required environmental variables and go to the source directory:
+
+	.. code-block:: console
+
+		export RTE_SDK=/path/to/rte_sdk
+		cd $RTE_SDK
+
+#. Set the compilation target. For example:
+
+	.. code-block:: console
+
+		export RTE_TARGET=x86_64-native-linux-gcc
+
+#. Build the application:
+
+	.. code-block:: console
+
+		make install T=$RTE_TARGET
+
+#. The compiled application will be located at:
+
+	.. code-block:: console
+
+		$RTE_SDK/$RTE_TARGET/app/flow-perf
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section, with
+a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864e..7279daebc6 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30  9:32         ` Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
                           ` (2 subsequent siblings)
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile          |   3 +
 app/test-flow-perf/actions_gen.c     |  86 ++++++
 app/test-flow-perf/actions_gen.h     |  48 ++++
 app/test-flow-perf/flow_gen.c        | 176 ++++++++++++
 app/test-flow-perf/flow_gen.h        |  61 ++++
 app/test-flow-perf/items_gen.c       | 265 +++++++++++++++++
 app/test-flow-perf/items_gen.h       |  68 +++++
 app/test-flow-perf/main.c            | 416 +++++++++++++++++++++++++--
 app/test-flow-perf/meson.build       |   8 +
 app/test-flow-perf/user_parameters.h |  15 +
 doc/guides/tools/flow-perf.rst       | 186 +++++++++++-
 11 files changed, 1307 insertions(+), 25 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index 45b1fb1464..968c7c60dd 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -19,6 +19,9 @@ CFLAGS += -Wno-unused-function
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 0000000000..564ed820e4
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+void
+gen_mark(void)
+{
+	mark_action.id = MARK_ID;
+}
+
+void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+void
+gen_jump(uint16_t next_table)
+{
+	jump_action.group = next_table;
+}
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number)
+{
+	uint16_t queue;
+	struct action_rss_data *rss_data;
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = ETH_RSS_IP,
+			.key_len = 0,
+			.queue_num = queues_number,
+			.key = 0,
+			.queue = rss_data->queue,
+		},
+		.key = { 0 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+}
+
+void
+gen_set_meta(void)
+{
+	meta_action.data = RTE_BE32(META_DATA);
+	meta_action.mask = RTE_BE32(0xffffffff);
+}
+
+void
+gen_set_tag(void)
+{
+	tag_action.data = RTE_BE32(META_DATA);
+	tag_action.mask = RTE_BE32(0xffffffff);
+	tag_action.index = TAG_INDEX;
+}
+
+void
+gen_port_id(void)
+{
+	port_id.id = PORT_ID_DST;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 0000000000..556d48b871
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,48 @@
+/** SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#ifndef _ACTION_GEN_
+#define _ACTION_GEN_
+
+struct rte_flow_action_mark mark_action;
+struct rte_flow_action_queue queue_action;
+struct rte_flow_action_jump jump_action;
+struct rte_flow_action_rss *rss_action;
+struct rte_flow_action_set_meta meta_action;
+struct rte_flow_action_set_tag tag_action;
+struct rte_flow_action_port_id port_id;
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[64];
+	uint16_t queue[128];
+} action_rss_data;
+
+void
+gen_mark(void);
+
+void
+gen_queue(uint16_t queue);
+
+void
+gen_jump(uint16_t next_table);
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number);
+
+void
+gen_set_meta(void);
+
+void
+gen_set_tag(void);
+
+void
+gen_port_id(void);
+
+#endif
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 0000000000..111c75b86b
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & META_ITEM)
+		add_meta_data(items, items_counter++);
+	if (flow_items & TAG_ITEM)
+		add_meta_tag(items, items_counter++);
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & VLAN_ITEM)
+		add_vlan(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & IPV6_ITEM)
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & TCP_ITEM)
+		add_tcp(items, items_counter++);
+	if (flow_items & UDP_ITEM)
+		add_udp(items, items_counter++);
+	if (flow_items & VXLAN_ITEM)
+		add_vxlan(items, items_counter++);
+	if (flow_items & VXLAN_GPE_ITEM)
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & GRE_ITEM)
+		add_gre(items, items_counter++);
+	if (flow_items & GENEVE_ITEM)
+		add_geneve(items, items_counter++);
+	if (flow_items & GTP_ITEM)
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint32_t counter, uint16_t next_table)
+{
+	uint8_t actions_counter = 0;
+	uint16_t queues[RXQs];
+	uint16_t hairpin_queues[HAIRPIN_QUEUES];
+	uint16_t i;
+	struct rte_flow_action_count count_action;
+	uint8_t temp = counter & 0xff;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		if (!counter)
+			gen_mark();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+	if (flow_actions & COUNT_ACTION) {
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+		actions[actions_counter++].conf = &count_action;
+	}
+	if (flow_actions & META_ACTION) {
+		if (!counter)
+			gen_set_meta();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+		actions[actions_counter++].conf = &meta_action;
+	}
+	if (flow_actions & TAG_ACTION) {
+		if (!counter)
+			gen_set_tag();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+		actions[actions_counter++].conf = &tag_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & RSS_ACTION) {
+		for (i = 0; i < RXQs; i++)
+			queues[i] = (temp >> (i << 1)) & 0x3;
+		gen_rss(queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+	if (flow_actions & JUMP_ACTION) {
+		if (!counter)
+			gen_jump(next_table);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+		actions[actions_counter++].conf = &jump_action;
+	}
+	if (flow_actions & PORT_ID_ACTION) {
+		if (!counter)
+			gen_port_id();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+		actions[actions_counter++].conf = &port_id;
+	}
+	if (flow_actions & DROP_ACTION)
+		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
+		gen_queue((counter % HAIRPIN_QUEUES) + RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		for (i = 0; i < HAIRPIN_QUEUES; i++)
+			hairpin_queues[i] = ((temp >> (i << 1)) & 0x3) + RXQs;
+		gen_rss(queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+			outer_ip_src, next_table);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 0000000000..99cb9e3791
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _FLOW_GEN_
+#define _FLOW_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+/* Items */
+#define ETH_ITEM       0x0001
+#define IPV4_ITEM      0x0002
+#define IPV6_ITEM      0x0004
+#define VLAN_ITEM      0x0008
+#define TCP_ITEM       0x0010
+#define UDP_ITEM       0x0020
+#define VXLAN_ITEM     0x0040
+#define VXLAN_GPE_ITEM 0x0080
+#define GRE_ITEM       0x0100
+#define GENEVE_ITEM    0x0200
+#define GTP_ITEM       0x0400
+#define META_ITEM      0x0800
+#define TAG_ITEM       0x1000
+
+/* Actions */
+#define QUEUE_ACTION   0x0001
+#define MARK_ACTION    0x0002
+#define JUMP_ACTION    0x0004
+#define RSS_ACTION     0x0008
+#define COUNT_ACTION   0x0010
+#define META_ACTION    0x0020
+#define TAG_ACTION     0x0040
+#define DROP_ACTION    0x0080
+#define PORT_ID_ACTION 0x0100
+#define HAIRPIN_QUEUE_ACTION 0x0200
+#define HAIRPIN_RSS_ACTION   0x0400
+
+/* Attributes */
+#define INGRESS  0x0001
+#define EGRESS   0x0002
+#define TRANSFER 0x0004
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error);
+
+#endif
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 0000000000..fb9733d4e7
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "user_parameters.h"
+
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+static struct rte_flow_item_vlan vlan_spec;
+static struct rte_flow_item_vlan vlan_mask;
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+static struct rte_flow_item_ipv6 ipv6_spec;
+static struct rte_flow_item_ipv6 ipv6_mask;
+static struct rte_flow_item_udp udp_spec;
+static struct rte_flow_item_udp udp_mask;
+static struct rte_flow_item_tcp tcp_spec;
+static struct rte_flow_item_tcp tcp_mask;
+static struct rte_flow_item_vxlan vxlan_spec;
+static struct rte_flow_item_vxlan vxlan_mask;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+static struct rte_flow_item_gre gre_spec;
+static struct rte_flow_item_gre gre_mask;
+static struct rte_flow_item_geneve geneve_spec;
+static struct rte_flow_item_geneve geneve_mask;
+static struct rte_flow_item_gtp gtp_spec;
+static struct rte_flow_item_gtp gtp_mask;
+static struct rte_flow_item_meta meta_spec;
+static struct rte_flow_item_meta meta_mask;
+static struct rte_flow_item_tag tag_spec;
+static struct rte_flow_item_tag tag_mask;
+
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+	eth_spec.type = 0;
+	eth_mask.type = 0;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t vlan_value = VLAN_VALUE;
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = 0xffffffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6)
+{
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+					sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 1,
+					sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags **/
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t proto = GRE_PROTO;
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = 0xffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t teid_value = TEID_VALUE;
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	uint8_t index = TAG_INDEX;
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 0000000000..0b01385951
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items related methods
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _ITEMS_GEN_
+#define _ITEMS_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6);
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+#endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 156b9ef553..115af4f302 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,29 +49,119 @@
 #include <rte_cycles.h>
 #include <rte_memory.h>
 
+#include "flow_gen.h"
 #include "user_parameters.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS 100
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static volatile bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
 
 static void usage(char *progname)
 {
 	printf("\nusage: %s", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt;
 	int opt_idx;
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              0, 0, 0 },
+		{ "hairpin-rss",                0, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -80,6 +170,140 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+			/* Attributes */
+			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
+				flow_attrs |= INGRESS;
+				printf("ingress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "egress")) {
+				flow_attrs |= EGRESS;
+				printf("egress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
+				flow_attrs |= TRANSFER;
+				printf("transfer ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "group")) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+			/* Items */
+			if (!strcmp(lgopts[opt_idx].name, "ether")) {
+				flow_items |= ETH_ITEM;
+				printf("ether / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
+				flow_items |= IPV4_ITEM;
+				printf("ipv4 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vlan")) {
+				flow_items |= VLAN_ITEM;
+				printf("vlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv6")) {
+				flow_items |= IPV6_ITEM;
+				printf("ipv6 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tcp")) {
+				flow_items |= TCP_ITEM;
+				printf("tcp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "udp")) {
+				flow_items |= UDP_ITEM;
+				printf("udp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan")) {
+				flow_items |= VXLAN_ITEM;
+				printf("vxlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan-gpe")) {
+				flow_items |= VXLAN_GPE_ITEM;
+				printf("vxlan-gpe / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gre")) {
+				flow_items |= GRE_ITEM;
+				printf("gre / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "geneve")) {
+				flow_items |= GENEVE_ITEM;
+				printf("geneve / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gtp")) {
+				flow_items |= GTP_ITEM;
+				printf("gtp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "meta")) {
+				flow_items |= META_ITEM;
+				printf("meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tag")) {
+				flow_items |= TAG_ITEM;
+				printf("tag / ");
+			}
+			/* Actions */
+			if (!strcmp(lgopts[opt_idx].name, "port-id")) {
+				flow_actions |= PORT_ID_ACTION;
+				printf("port-id / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "rss")) {
+				flow_actions |= RSS_ACTION;
+				printf("rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "queue")) {
+				flow_actions |= QUEUE_ACTION;
+				printf("queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "jump")) {
+				flow_actions |= JUMP_ACTION;
+				printf("jump / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "mark")) {
+				flow_actions |= MARK_ACTION;
+				printf("mark / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "count")) {
+				flow_actions |= COUNT_ACTION;
+				printf("count / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-meta")) {
+				flow_actions |= META_ACTION;
+				printf("set-meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-tag")) {
+				flow_actions |= TAG_ACTION;
+				printf("set-tag / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "drop")) {
+				flow_actions |= DROP_ACTION;
+				printf("drop / ");
+			}
+
+			/* Control */
+			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
+				dump_iterations = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -88,6 +312,128 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		cpu_time_used = 0;
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jumo into flow_group
+			 * This way the app will avoid the default rules
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
+				JUMP_ACTION, flow_group, 0, &error);
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i,  &error);
+				if (!flow)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
@@ -96,6 +442,8 @@ init_port(void)
 	int ret;
 	uint16_t i, j;
 	uint16_t port_id;
+	uint16_t nr_queues;
+	bool hairpin_flag = false;
 	uint16_t nr_ports = rte_eth_dev_count_avail();
 	struct rte_eth_hairpin_conf hairpin_conf = {
 			.peer_count = 1,
@@ -115,6 +463,13 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQs;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION ||
+		flow_actions & HAIRPIN_RSS_ACTION) {
+		nr_queues = RXQs + HAIRPIN_QUEUES;
+		hairpin_flag = true;
+	}
+
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
 	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
@@ -134,8 +489,8 @@ init_port(void)
 
 		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
 		printf(":: initializing port: %d\n", port_id);
-		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
-				TXQs + HAIRPIN_QUEUES, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 					":: cannot configure device: err=%d, port=%u\n",
@@ -173,26 +528,30 @@ init_port(void)
 					":: promiscuous mode enable failed: err=%s, port=%u\n",
 					rte_strerror(-ret), port_id);
 
-		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + TXQs;
-			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
-							NR_RXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
-		}
+		if (hairpin_flag) {
+			for (i = RXQs, j = 0;
+					i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + TXQs;
+				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+					NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 
-		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + RXQs;
-			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
-							NR_TXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
+			for (i = TXQs, j = 0;
+					i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + RXQs;
+				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+					NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 		}
 
 		ret = rte_eth_dev_start(port_id);
@@ -219,6 +578,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = 4000000;
+	iterations_number = 100000;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 
@@ -232,6 +600,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index ec9bb3b3aa..b3941f5c2d 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -5,7 +5,15 @@
 #
 # To build this example as a standalone application with an already-installed
 # DPDK instance, use 'make'
+name = 'flow_perf'
+allow_experimental_apis = true
+cflags += '-Wno-deprecated-declarations'
+cflags += '-Wunused-function'
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
+deps += ['ethdev']
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
index 56ec7f47b5..1d157430b6 100644
--- a/app/test-flow-perf/user_parameters.h
+++ b/app/test-flow-perf/user_parameters.h
@@ -14,3 +14,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/** Items/Actions parameters **/
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/** Flow items/acctions max size **/
+#define MAX_ITEMS_NUM 20
+#define MAX_ACTIONS_NUM 20
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 30ce1b6cc0..62e038c430 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -4,7 +4,19 @@
 RTE Flow performance tool
 =========================
 
-Application for rte_flow performance testing.
+Application for rte_flow performance testing. The application provide the
+ability to test insertion rate of specific rte_flow rule, by stressing it
+to the NIC, and calculate the insertion rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+The current design have single core insertion rate. In the future we may
+have a multi core insertion rate measurement support in the app.
 
 
 Compiling the Application
@@ -61,9 +73,179 @@ a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+	sudo ./flow_perf -n 4 -w 08:00.0,dv_flow_en=1 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-30  9:32         ` " Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 86 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  4 ++
 2 files changed, 90 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 115af4f302..7c11c0b577 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -75,6 +76,8 @@ static void usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -123,6 +126,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -304,6 +308,8 @@ args_parse(int argc, char **argv)
 			}
 			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
 				dump_iterations = true;
+			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
+				delete_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -323,9 +329,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (!flow_list[i])
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used;
@@ -337,6 +409,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -348,8 +421,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
 		cpu_time_used = 0;
+		flow_index = 0;
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jumo into flow_group
@@ -366,6 +445,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -389,6 +469,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -422,6 +504,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -580,6 +665,7 @@ main(int argc, char **argv)
 
 	force_quit = false;
 	dump_iterations = false;
+	delete_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 62e038c430..e07e659df5 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,6 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -89,6 +91,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 4/5] app/test-flow-perf: add memory dump to app
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
                           ` (2 preceding siblings ...)
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-30  9:32         ` Wisam Jaddo
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev; +Cc: Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 69 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  6 ++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 7c11c0b577..95435910de 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -78,6 +79,7 @@ static void usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: to dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -127,6 +129,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -310,6 +313,8 @@ args_parse(int argc, char **argv)
 				dump_iterations = true;
 			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
 				delete_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -321,6 +326,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -657,6 +718,7 @@ main(int argc, char **argv)
 	uint16_t nr_ports;
 	int ret;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	nr_ports = rte_eth_dev_count_avail();
 	ret = rte_eal_init(argc, argv);
@@ -666,6 +728,7 @@ main(int argc, char **argv)
 	force_quit = false;
 	dump_iterations = false;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
@@ -686,7 +749,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index e07e659df5..28d452fd06 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,7 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -94,6 +95,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v3 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-30  9:32       ` [dpdk-dev] [PATCH v3 0/5] *** Introduce flow perf application *** Wisam Jaddo
                           ` (3 preceding siblings ...)
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-30  9:32         ` Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30  9:32 UTC (permalink / raw)
  To: jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, dev

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi core performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 300 +++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |   6 +
 2 files changed, 306 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 95435910de..2596d05dc2 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -60,14 +60,45 @@ static uint8_t flow_group;
 static uint16_t flow_items;
 static uint16_t flow_actions;
 static uint8_t flow_attrs;
+
 static volatile bool force_quit;
 static volatile bool dump_iterations;
 static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
+static volatile bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST 32
+#define LCORE_MODE_PKT 1
+#define LCORE_MODE_STATS 2
+#define MAX_STREAMS 64
+#define MAX_LCORES 64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void usage(char *progname)
 {
@@ -80,6 +111,8 @@ static void usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: to dump all socket memory\n");
+	printf("  --enable-fwd: to enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -130,6 +163,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -315,6 +349,8 @@ args_parse(int argc, char **argv)
 				delete_flag = true;
 			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
 				dump_socket_mem_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "enable-fwd"))
+				enable_fwd = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -582,6 +618,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%6s %16s %16s %16s\n", "------", "----------------",
+			"----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/* First logical core is reserved for stats printing */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQs;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		/** Create FWD stream **/
+		for (queue = 0; queue < RXQs; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/** Print all streams **/
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/** No streams for this core **/
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
 static void
 init_port(void)
 {
@@ -757,6 +1052,11 @@ main(int argc, char **argv)
 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
 		(alloc - last_alloc) / 1.0e6);
 
+	if (enable_fwd) {
+		init_lcore_info();
+		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+	}
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 28d452fd06..ecd760de81 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -21,6 +21,8 @@ have a multi core insertion rate measurement support in the app.
 The application also provide the ability to measure rte flow deletion rate,
 in addition to memory consumption before and after the flows creation.
 
+The app supports single and multi core performance measurements.
+
 
 Compiling the Application
 =========================
@@ -98,6 +100,10 @@ The command line options are:
 *	``--dump-socket-mem``
 	Dump the memory stats for each socket before the insertion and after.
 
+*	``enable-fwd``
+	Enable packets forwarding after insertion/deletion operations.
+
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application
  2020-04-30  9:32         ` [dpdk-dev] [PATCH v3 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30 10:33           ` Wisam Jaddo
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                               ` (5 more replies)
  0 siblings, 6 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

Add new application to test rte flow performance from:
- Insertion rate.
- Deletion rate.
- Memory consumption.
- PPS forward measurement.

---
v4:
* Fix compilation error due to variable set but not used.

v3:
* Fix passing hairpin queues to hairpin rss action.

v2:
* reset cpu_time_used every port.
* generate different RSS action every flow with different RETA.
* Fix in commit log message


Wisam Jaddo (5):
  app/test-flow-perf: add flow performance skeleton
  app/test-flow-perf: add insertion rate calculation
  app/test-flow-perf: add deletion rate calculation
  app/test-flow-perf: add memory dump to app
  app/test-flow-perf: add packet forwarding support

 MAINTAINERS                          |    5 +
 app/Makefile                         |    1 +
 app/meson.build                      |    1 +
 app/test-flow-perf/Makefile          |   29 +
 app/test-flow-perf/actions_gen.c     |   86 +++
 app/test-flow-perf/actions_gen.h     |   48 ++
 app/test-flow-perf/flow_gen.c        |  176 +++++
 app/test-flow-perf/flow_gen.h        |   61 ++
 app/test-flow-perf/items_gen.c       |  265 +++++++
 app/test-flow-perf/items_gen.h       |   68 ++
 app/test-flow-perf/main.c            | 1071 ++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |   19 +
 app/test-flow-perf/user_parameters.h |   31 +
 config/common_base                   |    5 +
 doc/guides/tools/flow-perf.rst       |  265 +++++++
 doc/guides/tools/index.rst           |    1 +
 16 files changed, 2132 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
@ 2020-04-30 10:33             ` Wisam Jaddo
  2020-04-30 11:59               ` Xiaoyu Min
                                 ` (2 more replies)
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
                               ` (4 subsequent siblings)
  5 siblings, 3 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                          |   5 +
 app/Makefile                         |   1 +
 app/meson.build                      |   1 +
 app/test-flow-perf/Makefile          |  26 +++
 app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
 app/test-flow-perf/meson.build       |  11 ++
 app/test-flow-perf/user_parameters.h |  16 ++
 config/common_base                   |   5 +
 doc/guides/tools/flow-perf.rst       |  69 ++++++++
 doc/guides/tools/index.rst           |   1 +
 10 files changed, 381 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 app/test-flow-perf/user_parameters.h
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index d31a809292..b5632c1bf5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index 823771c5fc..bd823f3db7 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
diff --git a/app/meson.build b/app/meson.build
index 0f7fe94649..e26f5b72f5 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -14,6 +14,7 @@ apps = [
 	'test-compress-perf',
 	'test-crypto-perf',
 	'test-eventdev',
+	'test-flow-perf',
 	'test-fib',
 	'test-pipeline',
 	'test-pmd',
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 0000000000..45b1fb1464
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = flow_perf
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -Wno-deprecated-declarations
+CFLAGS += -Wno-unused-function
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 0000000000..156b9ef553
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+
+#include "user_parameters.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void usage(char *progname)
+{
+	printf("\nusage: %s", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			usage(argv[0]);
+			printf("Invalid option: %s\n", argv[optind]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i, j;
+	uint16_t port_id;
+	uint16_t nr_ports = rte_eth_dev_count_avail();
+	struct rte_eth_hairpin_conf hairpin_conf = {
+			.peer_count = 1,
+	};
+	struct rte_eth_conf port_conf = {
+		.rxmode = {
+			.split_hdr_size = 0,
+		},
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+					ETH_RSS_IP  |
+					ETH_RSS_UDP |
+					ETH_RSS_TCP,
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					"Error during getting device (port %u) info: %s\n",
+					port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		printf(":: initializing port: %d\n", port_id);
+		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
+				TXQs + HAIRPIN_QUEUES, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+					":: cannot configure device: err=%d, port=%u\n",
+					ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+		for (i = 0; i < RXQs; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+						rte_eth_dev_socket_id(port_id),
+						&rxq_conf,
+						mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQs; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+						rte_eth_dev_socket_id(port_id),
+						&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+						":: Tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+		}
+
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+					":: promiscuous mode enable failed: err=%s, port=%u\n",
+					rte_strerror(-ret), port_id);
+
+		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + TXQs;
+			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+							NR_RXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+			hairpin_conf.peers[0].port = port_id;
+			hairpin_conf.peers[0].queue = j + RXQs;
+			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+							NR_TXD, &hairpin_conf);
+			if (ret != 0)
+				rte_exit(EXIT_FAILURE,
+					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	uint16_t lcore_id;
+	uint16_t port;
+	uint16_t nr_ports;
+	int ret;
+	struct rte_flow_error error;
+
+	nr_ports = rte_eth_dev_count_avail();
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id)
+
+	if (rte_eal_wait_lcore(lcore_id) < 0)
+		break;
+
+	for (port = 0; port < nr_ports; port++) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 0000000000..ec9bb3b3aa
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+sources = files(
+	'main.c',
+)
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
new file mode 100644
index 0000000000..56ec7f47b5
--- /dev/null
+++ b/app/test-flow-perf/user_parameters.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ *
+ * This file will hold the user parameters values
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+/** Configuration **/
+#define RXQs 4
+#define TXQs 4
+#define HAIRPIN_QUEUES 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/config/common_base b/config/common_base
index 14000ba07e..eaaeaaaee2 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1124,3 +1124,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
 # Compile the eventdev application
 #
 CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 0000000000..30ce1b6cc0
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,69 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+RTE Flow performance tool
+=========================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+The basic compilation steps are:
+
+#. Set the required environmental variables and go to the source directory:
+
+	.. code-block:: console
+
+		export RTE_SDK=/path/to/rte_sdk
+		cd $RTE_SDK
+
+#. Set the compilation target. For example:
+
+	.. code-block:: console
+
+		export RTE_TARGET=x86_64-native-linux-gcc
+
+#. Build the application:
+
+	.. code-block:: console
+
+		make install T=$RTE_TARGET
+
+#. The compiled application will be located at:
+
+	.. code-block:: console
+
+		$RTE_SDK/$RTE_TARGET/app/flow-perf
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section, with
+a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864e..7279daebc6 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30 10:33             ` Wisam Jaddo
  2020-04-30 12:00               ` Xiaoyu Min
                                 ` (2 more replies)
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
                               ` (3 subsequent siblings)
  5 siblings, 3 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile          |   3 +
 app/test-flow-perf/actions_gen.c     |  86 ++++++
 app/test-flow-perf/actions_gen.h     |  48 ++++
 app/test-flow-perf/flow_gen.c        | 176 ++++++++++++
 app/test-flow-perf/flow_gen.h        |  61 ++++
 app/test-flow-perf/items_gen.c       | 265 +++++++++++++++++
 app/test-flow-perf/items_gen.h       |  68 +++++
 app/test-flow-perf/main.c            | 416 +++++++++++++++++++++++++--
 app/test-flow-perf/meson.build       |   8 +
 app/test-flow-perf/user_parameters.h |  15 +
 doc/guides/tools/flow-perf.rst       | 186 +++++++++++-
 11 files changed, 1307 insertions(+), 25 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index 45b1fb1464..968c7c60dd 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -19,6 +19,9 @@ CFLAGS += -Wno-unused-function
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 0000000000..564ed820e4
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+void
+gen_mark(void)
+{
+	mark_action.id = MARK_ID;
+}
+
+void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+void
+gen_jump(uint16_t next_table)
+{
+	jump_action.group = next_table;
+}
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number)
+{
+	uint16_t queue;
+	struct action_rss_data *rss_data;
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = ETH_RSS_IP,
+			.key_len = 0,
+			.queue_num = queues_number,
+			.key = 0,
+			.queue = rss_data->queue,
+		},
+		.key = { 0 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+}
+
+void
+gen_set_meta(void)
+{
+	meta_action.data = RTE_BE32(META_DATA);
+	meta_action.mask = RTE_BE32(0xffffffff);
+}
+
+void
+gen_set_tag(void)
+{
+	tag_action.data = RTE_BE32(META_DATA);
+	tag_action.mask = RTE_BE32(0xffffffff);
+	tag_action.index = TAG_INDEX;
+}
+
+void
+gen_port_id(void)
+{
+	port_id.id = PORT_ID_DST;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 0000000000..556d48b871
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,48 @@
+/** SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ **/
+
+#ifndef _ACTION_GEN_
+#define _ACTION_GEN_
+
+struct rte_flow_action_mark mark_action;
+struct rte_flow_action_queue queue_action;
+struct rte_flow_action_jump jump_action;
+struct rte_flow_action_rss *rss_action;
+struct rte_flow_action_set_meta meta_action;
+struct rte_flow_action_set_tag tag_action;
+struct rte_flow_action_port_id port_id;
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[64];
+	uint16_t queue[128];
+} action_rss_data;
+
+void
+gen_mark(void);
+
+void
+gen_queue(uint16_t queue);
+
+void
+gen_jump(uint16_t next_table);
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number);
+
+void
+gen_set_meta(void);
+
+void
+gen_set_tag(void);
+
+void
+gen_port_id(void);
+
+#endif
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 0000000000..2d42deace9
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "user_parameters.h"
+
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & META_ITEM)
+		add_meta_data(items, items_counter++);
+	if (flow_items & TAG_ITEM)
+		add_meta_tag(items, items_counter++);
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & VLAN_ITEM)
+		add_vlan(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & IPV6_ITEM)
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & TCP_ITEM)
+		add_tcp(items, items_counter++);
+	if (flow_items & UDP_ITEM)
+		add_udp(items, items_counter++);
+	if (flow_items & VXLAN_ITEM)
+		add_vxlan(items, items_counter++);
+	if (flow_items & VXLAN_GPE_ITEM)
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & GRE_ITEM)
+		add_gre(items, items_counter++);
+	if (flow_items & GENEVE_ITEM)
+		add_geneve(items, items_counter++);
+	if (flow_items & GTP_ITEM)
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint32_t counter, uint16_t next_table)
+{
+	uint8_t actions_counter = 0;
+	uint16_t queues[RXQs];
+	uint16_t hairpin_queues[HAIRPIN_QUEUES];
+	uint16_t i;
+	struct rte_flow_action_count count_action;
+	uint8_t temp = counter & 0xff;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		if (!counter)
+			gen_mark();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+	if (flow_actions & COUNT_ACTION) {
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+		actions[actions_counter++].conf = &count_action;
+	}
+	if (flow_actions & META_ACTION) {
+		if (!counter)
+			gen_set_meta();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+		actions[actions_counter++].conf = &meta_action;
+	}
+	if (flow_actions & TAG_ACTION) {
+		if (!counter)
+			gen_set_tag();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+		actions[actions_counter++].conf = &tag_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & RSS_ACTION) {
+		for (i = 0; i < RXQs; i++)
+			queues[i] = (temp >> (i << 1)) & 0x3;
+		gen_rss(queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+	if (flow_actions & JUMP_ACTION) {
+		if (!counter)
+			gen_jump(next_table);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+		actions[actions_counter++].conf = &jump_action;
+	}
+	if (flow_actions & PORT_ID_ACTION) {
+		if (!counter)
+			gen_port_id();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+		actions[actions_counter++].conf = &port_id;
+	}
+	if (flow_actions & DROP_ACTION)
+		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
+		gen_queue((counter % HAIRPIN_QUEUES) + RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		for (i = 0; i < HAIRPIN_QUEUES; i++)
+			hairpin_queues[i] = ((temp >> (i << 1)) & 0x3) + RXQs;
+		gen_rss(hairpin_queues, RXQs);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+			outer_ip_src, next_table);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 0000000000..99cb9e3791
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _FLOW_GEN_
+#define _FLOW_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+/* Items */
+#define ETH_ITEM       0x0001
+#define IPV4_ITEM      0x0002
+#define IPV6_ITEM      0x0004
+#define VLAN_ITEM      0x0008
+#define TCP_ITEM       0x0010
+#define UDP_ITEM       0x0020
+#define VXLAN_ITEM     0x0040
+#define VXLAN_GPE_ITEM 0x0080
+#define GRE_ITEM       0x0100
+#define GENEVE_ITEM    0x0200
+#define GTP_ITEM       0x0400
+#define META_ITEM      0x0800
+#define TAG_ITEM       0x1000
+
+/* Actions */
+#define QUEUE_ACTION   0x0001
+#define MARK_ACTION    0x0002
+#define JUMP_ACTION    0x0004
+#define RSS_ACTION     0x0008
+#define COUNT_ACTION   0x0010
+#define META_ACTION    0x0020
+#define TAG_ACTION     0x0040
+#define DROP_ACTION    0x0080
+#define PORT_ID_ACTION 0x0100
+#define HAIRPIN_QUEUE_ACTION 0x0200
+#define HAIRPIN_RSS_ACTION   0x0400
+
+/* Attributes */
+#define INGRESS  0x0001
+#define EGRESS   0x0002
+#define TRANSFER 0x0004
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	struct rte_flow_error *error);
+
+#endif
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 0000000000..fb9733d4e7
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "user_parameters.h"
+
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+static struct rte_flow_item_vlan vlan_spec;
+static struct rte_flow_item_vlan vlan_mask;
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+static struct rte_flow_item_ipv6 ipv6_spec;
+static struct rte_flow_item_ipv6 ipv6_mask;
+static struct rte_flow_item_udp udp_spec;
+static struct rte_flow_item_udp udp_mask;
+static struct rte_flow_item_tcp tcp_spec;
+static struct rte_flow_item_tcp tcp_mask;
+static struct rte_flow_item_vxlan vxlan_spec;
+static struct rte_flow_item_vxlan vxlan_mask;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+static struct rte_flow_item_gre gre_spec;
+static struct rte_flow_item_gre gre_mask;
+static struct rte_flow_item_geneve geneve_spec;
+static struct rte_flow_item_geneve geneve_mask;
+static struct rte_flow_item_gtp gtp_spec;
+static struct rte_flow_item_gtp gtp_mask;
+static struct rte_flow_item_meta meta_spec;
+static struct rte_flow_item_meta meta_mask;
+static struct rte_flow_item_tag tag_spec;
+static struct rte_flow_item_tag tag_mask;
+
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+	eth_spec.type = 0;
+	eth_mask.type = 0;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t vlan_value = VLAN_VALUE;
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = 0xffffffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6)
+{
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+					sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 1,
+					sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags **/
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t proto = GRE_PROTO;
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = 0xffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t teid_value = TEID_VALUE;
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	uint8_t index = TAG_INDEX;
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 0000000000..0b01385951
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * This file contains the items related methods
+ *
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#ifndef _ITEMS_GEN_
+#define _ITEMS_GEN_
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "user_parameters.h"
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6);
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+#endif
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 156b9ef553..115af4f302 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,29 +49,119 @@
 #include <rte_cycles.h>
 #include <rte_memory.h>
 
+#include "flow_gen.h"
 #include "user_parameters.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS 100
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static volatile bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
 
 static void usage(char *progname)
 {
 	printf("\nusage: %s", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt;
 	int opt_idx;
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              0, 0, 0 },
+		{ "hairpin-rss",                0, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -80,6 +170,140 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+			/* Attributes */
+			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
+				flow_attrs |= INGRESS;
+				printf("ingress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "egress")) {
+				flow_attrs |= EGRESS;
+				printf("egress ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
+				flow_attrs |= TRANSFER;
+				printf("transfer ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "group")) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+			/* Items */
+			if (!strcmp(lgopts[opt_idx].name, "ether")) {
+				flow_items |= ETH_ITEM;
+				printf("ether / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
+				flow_items |= IPV4_ITEM;
+				printf("ipv4 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vlan")) {
+				flow_items |= VLAN_ITEM;
+				printf("vlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "ipv6")) {
+				flow_items |= IPV6_ITEM;
+				printf("ipv6 / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tcp")) {
+				flow_items |= TCP_ITEM;
+				printf("tcp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "udp")) {
+				flow_items |= UDP_ITEM;
+				printf("udp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan")) {
+				flow_items |= VXLAN_ITEM;
+				printf("vxlan / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "vxlan-gpe")) {
+				flow_items |= VXLAN_GPE_ITEM;
+				printf("vxlan-gpe / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gre")) {
+				flow_items |= GRE_ITEM;
+				printf("gre / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "geneve")) {
+				flow_items |= GENEVE_ITEM;
+				printf("geneve / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "gtp")) {
+				flow_items |= GTP_ITEM;
+				printf("gtp / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "meta")) {
+				flow_items |= META_ITEM;
+				printf("meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "tag")) {
+				flow_items |= TAG_ITEM;
+				printf("tag / ");
+			}
+			/* Actions */
+			if (!strcmp(lgopts[opt_idx].name, "port-id")) {
+				flow_actions |= PORT_ID_ACTION;
+				printf("port-id / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "rss")) {
+				flow_actions |= RSS_ACTION;
+				printf("rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "queue")) {
+				flow_actions |= QUEUE_ACTION;
+				printf("queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "jump")) {
+				flow_actions |= JUMP_ACTION;
+				printf("jump / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "mark")) {
+				flow_actions |= MARK_ACTION;
+				printf("mark / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "count")) {
+				flow_actions |= COUNT_ACTION;
+				printf("count / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-meta")) {
+				flow_actions |= META_ACTION;
+				printf("set-meta / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "set-tag")) {
+				flow_actions |= TAG_ACTION;
+				printf("set-tag / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "drop")) {
+				flow_actions |= DROP_ACTION;
+				printf("drop / ");
+			}
+
+			/* Control */
+			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
+				dump_iterations = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -88,6 +312,128 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		cpu_time_used = 0;
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jumo into flow_group
+			 * This way the app will avoid the default rules
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
+				JUMP_ACTION, flow_group, 0, &error);
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i,  &error);
+				if (!flow)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
@@ -96,6 +442,8 @@ init_port(void)
 	int ret;
 	uint16_t i, j;
 	uint16_t port_id;
+	uint16_t nr_queues;
+	bool hairpin_flag = false;
 	uint16_t nr_ports = rte_eth_dev_count_avail();
 	struct rte_eth_hairpin_conf hairpin_conf = {
 			.peer_count = 1,
@@ -115,6 +463,13 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQs;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION ||
+		flow_actions & HAIRPIN_RSS_ACTION) {
+		nr_queues = RXQs + HAIRPIN_QUEUES;
+		hairpin_flag = true;
+	}
+
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
 	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
@@ -134,8 +489,8 @@ init_port(void)
 
 		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
 		printf(":: initializing port: %d\n", port_id);
-		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
-				TXQs + HAIRPIN_QUEUES, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 					":: cannot configure device: err=%d, port=%u\n",
@@ -173,26 +528,30 @@ init_port(void)
 					":: promiscuous mode enable failed: err=%s, port=%u\n",
 					rte_strerror(-ret), port_id);
 
-		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + TXQs;
-			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
-							NR_RXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
-		}
+		if (hairpin_flag) {
+			for (i = RXQs, j = 0;
+					i < RXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + TXQs;
+				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+					NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 
-		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
-			hairpin_conf.peers[0].port = port_id;
-			hairpin_conf.peers[0].queue = j + RXQs;
-			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
-							NR_TXD, &hairpin_conf);
-			if (ret != 0)
-				rte_exit(EXIT_FAILURE,
-					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
-					ret, port_id);
+			for (i = TXQs, j = 0;
+					i < TXQs + HAIRPIN_QUEUES; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + RXQs;
+				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+					NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
 		}
 
 		ret = rte_eth_dev_start(port_id);
@@ -219,6 +578,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = 4000000;
+	iterations_number = 100000;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 
@@ -232,6 +600,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index ec9bb3b3aa..b3941f5c2d 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -5,7 +5,15 @@
 #
 # To build this example as a standalone application with an already-installed
 # DPDK instance, use 'make'
+name = 'flow_perf'
+allow_experimental_apis = true
+cflags += '-Wno-deprecated-declarations'
+cflags += '-Wunused-function'
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
+deps += ['ethdev']
diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
index 56ec7f47b5..1d157430b6 100644
--- a/app/test-flow-perf/user_parameters.h
+++ b/app/test-flow-perf/user_parameters.h
@@ -14,3 +14,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/** Items/Actions parameters **/
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/** Flow items/acctions max size **/
+#define MAX_ITEMS_NUM 20
+#define MAX_ACTIONS_NUM 20
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 30ce1b6cc0..62e038c430 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -4,7 +4,19 @@
 RTE Flow performance tool
 =========================
 
-Application for rte_flow performance testing.
+Application for rte_flow performance testing. The application provide the
+ability to test insertion rate of specific rte_flow rule, by stressing it
+to the NIC, and calculate the insertion rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+The current design have single core insertion rate. In the future we may
+have a multi core insertion rate measurement support in the app.
 
 
 Compiling the Application
@@ -61,9 +73,179 @@ a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
+	sudo ./flow_perf -n 4 -w 08:00.0,dv_flow_en=1 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-30 10:33             ` " Wisam Jaddo
  2020-04-30 12:02               ` Xiaoyu Min
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
                               ` (2 subsequent siblings)
  5 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 86 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  4 ++
 2 files changed, 90 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 115af4f302..7c11c0b577 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -75,6 +76,8 @@ static void usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -123,6 +126,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -304,6 +308,8 @@ args_parse(int argc, char **argv)
 			}
 			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
 				dump_iterations = true;
+			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
+				delete_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -323,9 +329,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (!flow_list[i])
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used;
@@ -337,6 +409,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -348,8 +421,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
 		cpu_time_used = 0;
+		flow_index = 0;
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jumo into flow_group
@@ -366,6 +445,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -389,6 +469,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -422,6 +504,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -580,6 +665,7 @@ main(int argc, char **argv)
 
 	force_quit = false;
 	dump_iterations = false;
+	delete_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 62e038c430..e07e659df5 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,6 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -89,6 +91,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
                               ` (2 preceding siblings ...)
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-30 10:33             ` Wisam Jaddo
  2020-04-30 12:03               ` Xiaoyu Min
  2020-05-06  4:10               ` Ajit Khaparde
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
  2020-05-04  7:12             ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Thomas Monjalon
  5 siblings, 2 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan; +Cc: Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 69 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  6 ++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 7c11c0b577..95435910de 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -62,6 +62,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static volatile bool dump_iterations;
+static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -78,6 +79,7 @@ static void usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: to dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -127,6 +129,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -310,6 +313,8 @@ args_parse(int argc, char **argv)
 				dump_iterations = true;
 			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
 				delete_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -321,6 +326,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -657,6 +718,7 @@ main(int argc, char **argv)
 	uint16_t nr_ports;
 	int ret;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	nr_ports = rte_eth_dev_count_avail();
 	ret = rte_eal_init(argc, argv);
@@ -666,6 +728,7 @@ main(int argc, char **argv)
 	force_quit = false;
 	dump_iterations = false;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flows_count = 4000000;
 	iterations_number = 100000;
 	flow_group = 0;
@@ -686,7 +749,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index e07e659df5..28d452fd06 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -18,7 +18,8 @@ give different flow each time, and all other items will have open masks.
 The current design have single core insertion rate. In the future we may
 have a multi core insertion rate measurement support in the app.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -94,6 +95,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v4 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
                               ` (3 preceding siblings ...)
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-30 10:33             ` Wisam Jaddo
  2020-04-30 12:05               ` Xiaoyu Min
  2020-05-04  7:12             ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Thomas Monjalon
  5 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-04-30 10:33 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi core performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 300 +++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |   6 +
 2 files changed, 306 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 95435910de..2596d05dc2 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -60,14 +60,45 @@ static uint8_t flow_group;
 static uint16_t flow_items;
 static uint16_t flow_actions;
 static uint8_t flow_attrs;
+
 static volatile bool force_quit;
 static volatile bool dump_iterations;
 static volatile bool dump_socket_mem_flag;
 static volatile bool delete_flag;
+static volatile bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST 32
+#define LCORE_MODE_PKT 1
+#define LCORE_MODE_STATS 2
+#define MAX_STREAMS 64
+#define MAX_LCORES 64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void usage(char *progname)
 {
@@ -80,6 +111,8 @@ static void usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: to dump all socket memory\n");
+	printf("  --enable-fwd: to enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -130,6 +163,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -315,6 +349,8 @@ args_parse(int argc, char **argv)
 				delete_flag = true;
 			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
 				dump_socket_mem_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "enable-fwd"))
+				enable_fwd = true;
 			break;
 		default:
 			usage(argv[0]);
@@ -582,6 +618,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%6s %16s %16s %16s\n", "------", "----------------",
+			"----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/* First logical core is reserved for stats printing */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQs;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		/** Create FWD stream **/
+		for (queue = 0; queue < RXQs; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/** Print all streams **/
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/** No streams for this core **/
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
 static void
 init_port(void)
 {
@@ -757,6 +1052,11 @@ main(int argc, char **argv)
 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
 		(alloc - last_alloc) / 1.0e6);
 
+	if (enable_fwd) {
+		init_lcore_info();
+		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+	}
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id)
 
 	if (rte_eal_wait_lcore(lcore_id) < 0)
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 28d452fd06..ecd760de81 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -21,6 +21,8 @@ have a multi core insertion rate measurement support in the app.
 The application also provide the ability to measure rte flow deletion rate,
 in addition to memory consumption before and after the flows creation.
 
+The app supports single and multi core performance measurements.
+
 
 Compiling the Application
 =========================
@@ -98,6 +100,10 @@ The command line options are:
 *	``--dump-socket-mem``
 	Dump the memory stats for each socket before the insertion and after.
 
+*	``enable-fwd``
+	Enable packets forwarding after insertion/deletion operations.
+
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-04-30 11:59               ` Xiaoyu Min
  2020-05-04 10:16               ` Andrew Rybchenko
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
  2 siblings, 0 replies; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-30 11:59 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, thomas, jerinjacobk, gerlitz.or, l.yan

On Thu, 20-04-30, 10:33, Wisam Jaddo wrote:
> Add flow performance application skeleton.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Acked-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-04-30 12:00               ` Xiaoyu Min
  2020-05-04 12:01               ` Andrew Rybchenko
  2020-05-06  4:00               ` Ajit Khaparde
  2 siblings, 0 replies; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-30 12:00 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, thomas, jerinjacobk, gerlitz.or, l.yan

On Thu, 20-04-30, 10:33, Wisam Jaddo wrote:
> Add insertion rate calculation feature into flow
> performance application.
> 
> The application now provide the ability to test
> insertion rate of specific rte_flow rule, by
> stressing it to the NIC, and calculate the
> insertion rate.
> 
> The application offers some options in the command
> line, to configure which rule to apply.
> 
> After that the application will start producing
> rules with same pattern but increasing the outer IP
> source address by 1 each time, thus it will give
> different flow each time, and all other items will
> have open masks.
> 
> The current design have single core insertion rate.
> In the future we may have a multi core insertion
> rate measurement support in the app.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Acked-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/5] app/test-flow-perf: add deletion rate calculation
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 3/5] app/test-flow-perf: add deletion " Wisam Jaddo
@ 2020-04-30 12:02               ` Xiaoyu Min
  0 siblings, 0 replies; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-30 12:02 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, thomas, jerinjacobk, gerlitz.or, l.yan

On Thu, 20-04-30, 10:33, Wisam Jaddo wrote:
> Add the ability to test deletion rate for flow performance
> application.
> 
> This feature is disabled by default, and can be enabled by
> add "--deletion-rate" in the application command line options.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Acked-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
@ 2020-04-30 12:03               ` Xiaoyu Min
  2020-05-06  4:10               ` Ajit Khaparde
  1 sibling, 0 replies; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-30 12:03 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, thomas, jerinjacobk, gerlitz.or, l.yan, Suanming Mou

On Thu, 20-04-30, 10:33, Wisam Jaddo wrote:
> Introduce new feature to dump memory statistics of each socket
> and a total for all before and after the creation.
> 
> This will give two main advantage:
> 1- Check the memory consumption for large number of flows
> "insertion rate scenario alone"
> 
> 2- Check that no memory leackage after doing insertion then
> deletion.
> 
> Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Acked-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 5/5] app/test-flow-perf: add packet forwarding support
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
@ 2020-04-30 12:05               ` Xiaoyu Min
  0 siblings, 0 replies; 102+ messages in thread
From: Xiaoyu Min @ 2020-04-30 12:05 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dev, thomas, jerinjacobk, gerlitz.or, l.yan

On Thu, 20-04-30, 10:33, Wisam Jaddo wrote:
> Introduce packet forwarding support to the app to do
> some performance measurements.
> 
> The measurements are reported in term of packet per
> second unit. The forwarding will start after the end
> of insertion/deletion operations.
> 
> The support has single and multi core performance measurements.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
Acked-by: Xiaoyu Min <jackmin@mellanox.com>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application
  2020-04-30 10:33           ` [dpdk-dev] [PATCH v4 0/5] Introduce flow perf application Wisam Jaddo
                               ` (4 preceding siblings ...)
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 5/5] app/test-flow-perf: add packet forwarding support Wisam Jaddo
@ 2020-05-04  7:12             ` Thomas Monjalon
  5 siblings, 0 replies; 102+ messages in thread
From: Thomas Monjalon @ 2020-05-04  7:12 UTC (permalink / raw)
  To: dev
  Cc: jackmin, jerinjacobk, gerlitz.or, l.yan, Wisam Jaddo,
	ferruh.yigit, ktraynor, arybchenko, ajit.khaparde, Wenzhuo Lu

Would be nice to have a last round of review before merging in -rc2.
Volunteers?


30/04/2020 12:33, Wisam Jaddo:
> Add new application to test rte flow performance from:
> - Insertion rate.
> - Deletion rate.
> - Memory consumption.
> - PPS forward measurement.




^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-30 11:59               ` Xiaoyu Min
@ 2020-05-04 10:16               ` Andrew Rybchenko
  2020-05-05 10:45                 ` Wisam Monther
                                   ` (2 more replies)
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
  2 siblings, 3 replies; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-04 10:16 UTC (permalink / raw)
  To: Wisam Jaddo, dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

On 4/30/20 1:33 PM, Wisam Jaddo wrote:
> Add flow performance application skeleton.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---
>  MAINTAINERS                          |   5 +
>  app/Makefile                         |   1 +
>  app/meson.build                      |   1 +
>  app/test-flow-perf/Makefile          |  26 +++
>  app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
>  app/test-flow-perf/meson.build       |  11 ++
>  app/test-flow-perf/user_parameters.h |  16 ++
>  config/common_base                   |   5 +
>  doc/guides/tools/flow-perf.rst       |  69 ++++++++
>  doc/guides/tools/index.rst           |   1 +
>  10 files changed, 381 insertions(+)
>  create mode 100644 app/test-flow-perf/Makefile
>  create mode 100644 app/test-flow-perf/main.c
>  create mode 100644 app/test-flow-perf/meson.build
>  create mode 100644 app/test-flow-perf/user_parameters.h
>  create mode 100644 doc/guides/tools/flow-perf.rst
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d31a809292..b5632c1bf5 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
>  F: app/test-pmd/
>  F: doc/guides/testpmd_app_ug/
>  
> +Flow performance tool
> +M: Wisam Jaddo <wisamm@mellanox.com>
> +F: app/test-flow-perf
> +F: doc/guides/flow-perf.rst
> +

Shouldn't it be alphabetially sorted? I think by app name.

>  Compression performance test application
>  T: git://dpdk.org/next/dpdk-next-crypto
>  F: app/test-compress-perf/
> diff --git a/app/Makefile b/app/Makefile
> index 823771c5fc..bd823f3db7 100644
> --- a/app/Makefile
> +++ b/app/Makefile
> @@ -9,6 +9,7 @@ DIRS-$(CONFIG_RTE_PROC_INFO) += proc-info
>  DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
>  DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
>  DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
> +DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
>  DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
>  DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
>  DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
> diff --git a/app/meson.build b/app/meson.build
> index 0f7fe94649..e26f5b72f5 100644
> --- a/app/meson.build
> +++ b/app/meson.build
> @@ -14,6 +14,7 @@ apps = [
>  	'test-compress-perf',
>  	'test-crypto-perf',
>  	'test-eventdev',
> +	'test-flow-perf',

I think 'l' goes after 'i'.

>  	'test-fib',
>  	'test-pipeline',
>  	'test-pmd',
> diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
> new file mode 100644
> index 0000000000..45b1fb1464
> --- /dev/null
> +++ b/app/test-flow-perf/Makefile
> @@ -0,0 +1,26 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2020 Mellanox Technologies, Ltd
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
> +
> +#
> +# library name
> +#
> +APP = flow_perf
> +
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -Wno-deprecated-declarations
> +CFLAGS += -Wno-unused-function

Why is unused function warning disabled?

> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-y += main.c
> +
> +include $(RTE_SDK)/mk/rte.app.mk
> +
> +endif
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> new file mode 100644
> index 0000000000..156b9ef553
> --- /dev/null
> +++ b/app/test-flow-perf/main.c
> @@ -0,0 +1,246 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * This file contain the application main file
> + * This application provides the user the ability to test the
> + * insertion rate for specific rte_flow rule under stress state ~4M rule/
> + *
> + * Then it will also provide packet per second measurement after installing
> + * all rules, the user may send traffic to test the PPS that match the rules
> + * after all rules are installed, to check performance or functionality after
> + * the stress.
> + *
> + * The flows insertion will go for all ports first, then it will print the
> + * results, after that the application will go into forwarding packets mode
> + * it will start receiving traffic if any and then forwarding it back and
> + * gives packet per second measurement.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <sys/types.h>
> +#include <sys/queue.h>
> +#include <netinet/in.h>
> +#include <setjmp.h>
> +#include <stdarg.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <assert.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <sys/time.h>
> +
> +
> +#include <rte_eal.h>
> +#include <rte_common.h>
> +#include <rte_malloc.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev.h>
> +#include <rte_mempool.h>
> +#include <rte_mbuf.h>
> +#include <rte_net.h>
> +#include <rte_flow.h>
> +#include <rte_cycles.h>
> +#include <rte_memory.h>

It looks like many-many above headers are actulally unused.
Please, remove unused headers.

> +
> +#include "user_parameters.h"
> +
> +static uint32_t nb_lcores;
> +static struct rte_mempool *mbuf_mp;
> +
> +static void usage(char *progname)
> +{
> +	printf("\nusage: %s", progname);

Is \n missing at the end of format string?

> +}
> +
> +static void
> +args_parse(int argc, char **argv)
> +{
> +	char **argvopt;
> +	int opt;
> +	int opt_idx;
> +	static struct option lgopts[] = {
> +		/* Control */
> +		{ "help",                       0, 0, 0 },
> +	};
> +
> +	argvopt = argv;
> +
> +	while ((opt = getopt_long(argc, argvopt, "",
> +				lgopts, &opt_idx)) != EOF) {
> +		switch (opt) {
> +		case 0:
> +			if (!strcmp(lgopts[opt_idx].name, "help")) {
> +				usage(argv[0]);
> +				rte_exit(EXIT_SUCCESS, "Displayed help\n");
> +			}
> +			break;
> +		default:
> +			usage(argv[0]);
> +			printf("Invalid option: %s\n", argv[optind]);

I think it is more friendly to log errors to stderr and log
invalid option message first before usage.

> +			rte_exit(EXIT_SUCCESS, "Invalid option\n");
> +			break;
> +		}
> +	}
> +}
> +
> +static void
> +init_port(void)
> +{
> +	int ret;
> +	uint16_t i, j;
> +	uint16_t port_id;
> +	uint16_t nr_ports = rte_eth_dev_count_avail();
> +	struct rte_eth_hairpin_conf hairpin_conf = {
> +			.peer_count = 1,
> +	};
> +	struct rte_eth_conf port_conf = {
> +		.rxmode = {
> +			.split_hdr_size = 0,

I think it is not required, since compiler will
do it for you anyway having below initialization.

> +		},
> +		.rx_adv_conf = {
> +			.rss_conf.rss_hf =
> +					ETH_RSS_IP  |
> +					ETH_RSS_UDP |

May be it is better to remove ETH_RSS_UDP by default,
since it is less common that RSS for TCP because of
possible fragmentation and packets from the same
stream delivered to different CPU cores.

> +					ETH_RSS_TCP,
> +		}
> +	};
> +	struct rte_eth_txconf txq_conf;
> +	struct rte_eth_rxconf rxq_conf;
> +	struct rte_eth_dev_info dev_info;
> +
> +	if (nr_ports == 0)
> +		rte_exit(EXIT_FAILURE, "Error: no port detected\n");

Please, add empty line here to logically separate above
error from pool creation. Right now it looks misleading.

> +	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
> +					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
> +					0, MBUF_SIZE,
> +					rte_socket_id());
> +
> +	if (mbuf_mp == NULL)
> +		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
> +
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		ret = rte_eth_dev_info_get(port_id, &dev_info);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +					"Error during getting device (port %u) info: %s\n",

The line looks long. More than 80 symbols.
Please, use just one TAB to indent relative to rte_exit().
It will make the line a bit shorter.

> +					port_id, strerror(-ret));
> +
> +		port_conf.txmode.offloads &= dev_info.tx_offload_capa;

Taking into account that txmode.offloads are 0 above,
it looks strange. May be it is added to early in the
patch series?

> +		printf(":: initializing port: %d\n", port_id);
> +		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
> +				TXQs + HAIRPIN_QUEUES, &port_conf);

RXQs and TXQs are bad since mixing upper and lower
cases letters is bad. Macros are in upper case typically.
May be RXQ_NUM? Or NR_RXQ (however, it will be very easy
to missread as NR_RXD and vise versa, so not good)?

> +		if (ret < 0)
> +			rte_exit(EXIT_FAILURE,
> +					":: cannot configure device: err=%d, port=%u\n",

Too long line, decrease indent

> +					ret, port_id);
> +
> +		rxq_conf = dev_info.default_rxconf;
> +		rxq_conf.offloads = port_conf.rxmode.offloads;

Same here. Also it should take supported offloads
into account.

> +		for (i = 0; i < RXQs; i++) {
> +			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
> +						rte_eth_dev_socket_id(port_id),
> +						&rxq_conf,
> +						mbuf_mp);
> +			if (ret < 0)
> +				rte_exit(EXIT_FAILURE,
> +						":: Rx queue setup failed: err=%d, port=%u\n",

Too long line, decrease indent

> +						ret, port_id);
> +		}
> +
> +		txq_conf = dev_info.default_txconf;
> +		txq_conf.offloads = port_conf.txmode.offloads;

Same here. Also it should take supported offloads
into account.

> +
> +		for (i = 0; i < TXQs; i++) {
> +			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
> +						rte_eth_dev_socket_id(port_id),
> +						&txq_conf);
> +			if (ret < 0)
> +				rte_exit(EXIT_FAILURE,
> +						":: Tx queue setup failed: err=%d, port=%u\n",

Too long line, decrease indent

> +						ret, port_id);
> +		}
> +
> +		ret = rte_eth_promiscuous_enable(port_id);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +					":: promiscuous mode enable failed: err=%s, port=%u\n",

Too long line, decrease indent

> +					rte_strerror(-ret), port_id);
> +
> +		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
> +			hairpin_conf.peers[0].port = port_id;
> +			hairpin_conf.peers[0].queue = j + TXQs;
> +			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
> +							NR_RXD, &hairpin_conf);
> +			if (ret != 0)
> +				rte_exit(EXIT_FAILURE,
> +					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
> +					ret, port_id);
> +		}
> +
> +		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
> +			hairpin_conf.peers[0].port = port_id;
> +			hairpin_conf.peers[0].queue = j + RXQs;
> +			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
> +							NR_TXD, &hairpin_conf);
> +			if (ret != 0)
> +				rte_exit(EXIT_FAILURE,
> +					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
> +					ret, port_id);
> +		}
> +
> +		ret = rte_eth_dev_start(port_id);
> +		if (ret < 0)
> +			rte_exit(EXIT_FAILURE,
> +				"rte_eth_dev_start:err=%d, port=%u\n",
> +				ret, port_id);
> +
> +		printf(":: initializing port: %d done\n", port_id);
> +	}
> +}
> +
> +int
> +main(int argc, char **argv)
> +{
> +	uint16_t lcore_id;
> +	uint16_t port;
> +	uint16_t nr_ports;
> +	int ret;
> +	struct rte_flow_error error;
> +
> +	nr_ports = rte_eth_dev_count_avail();

Before EAL init? It is defintely unclear.
If it is done by purpose, please, add a comment
to explain why.

> +	ret = rte_eal_init(argc, argv);
> +	if (ret < 0)
> +		rte_exit(EXIT_FAILURE, "EAL init failed\n");
> +
> +	argc -= ret;
> +	argv += ret;
> +
> +	if (argc > 1)
> +		args_parse(argc, argv);
> +
> +	init_port();
> +
> +	nb_lcores = rte_lcore_count();
> +
> +	if (nb_lcores <= 1)
> +		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
> +
> +	RTE_LCORE_FOREACH_SLAVE(lcore_id)
> +
> +	if (rte_eal_wait_lcore(lcore_id) < 0)
> +		break;

Break what? Is it compile tested?

> +
> +	for (port = 0; port < nr_ports; port++) {
> +		rte_flow_flush(port, &error);
> +		rte_eth_dev_stop(port);
> +		rte_eth_dev_close(port);
> +	}
> +	return 0;
> +}
> diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
> new file mode 100644
> index 0000000000..ec9bb3b3aa
> --- /dev/null
> +++ b/app/test-flow-perf/meson.build
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2020 Mellanox Technologies, Ltd
> +
> +# meson file, for building this example as part of a main DPDK build.
> +#
> +# To build this example as a standalone application with an already-installed
> +# DPDK instance, use 'make'
> +
> +sources = files(
> +	'main.c',
> +)
> diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
> new file mode 100644
> index 0000000000..56ec7f47b5
> --- /dev/null
> +++ b/app/test-flow-perf/user_parameters.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: BSD-3-Claus
> + *
> + * This file will hold the user parameters values
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +/** Configuration **/
> +#define RXQs 4
> +#define TXQs 4
> +#define HAIRPIN_QUEUES 4

It makes it Mellanox-speicific from the first patch since
only mlx5 supports hairpin queues. Such things should be
specified using parameters from the very beginning.

> +#define TOTAL_MBUF_NUM 32000
> +#define MBUF_SIZE 2048
> +#define MBUF_CACHE_SIZE 512
> +#define NR_RXD  256
> +#define NR_TXD  256
> diff --git a/config/common_base b/config/common_base
> index 14000ba07e..eaaeaaaee2 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -1124,3 +1124,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
>  # Compile the eventdev application
>  #
>  CONFIG_RTE_APP_EVENTDEV=y
> +
> +#
> +# Compile the rte flow perf application
> +#
> +CONFIG_RTE_TEST_FLOW_PERF=y

CONFIG_RTE_APP_FLOW_PERF to follow naming conventions.

> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> new file mode 100644
> index 0000000000..30ce1b6cc0
> --- /dev/null
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -0,0 +1,69 @@
> +..	SPDX-License-Identifier: BSD-3-Clause
> +	Copyright 2020 Mellanox Technologies, Ltd
> +
> +RTE Flow performance tool
> +=========================
> +
> +Application for rte_flow performance testing.
> +
> +
> +Compiling the Application
> +=========================
> +The ``test-flow-perf`` application is compiled as part of the main compilation
> +of the DPDK libraries and tools.
> +
> +Refer to the DPDK Getting Started Guides for details.
> +The basic compilation steps are:
> +
> +#. Set the required environmental variables and go to the source directory:
> +
> +	.. code-block:: console
> +
> +		export RTE_SDK=/path/to/rte_sdk
> +		cd $RTE_SDK
> +
> +#. Set the compilation target. For example:
> +
> +	.. code-block:: console
> +
> +		export RTE_TARGET=x86_64-native-linux-gcc
> +
> +#. Build the application:
> +
> +	.. code-block:: console
> +
> +		make install T=$RTE_TARGET
> +
> +#. The compiled application will be located at:
> +
> +	.. code-block:: console
> +
> +		$RTE_SDK/$RTE_TARGET/app/flow-perf
> +
> +
> +Running the Application
> +=======================
> +
> +EAL Command-line Options
> +------------------------
> +
> +Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
> +or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
> +a list of available EAL command-line options.
> +
> +
> +Flow performance Options
> +------------------------
> +
> +The following are the command-line options for the flow performance application.
> +They must be separated from the EAL options, shown in the previous section, with
> +a ``--`` separator:
> +
> +.. code-block:: console
> +
> +	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
> +
> +The command line options are:
> +
> +*	``--help``
> +	Display a help message and quit.
> diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
> index 782b30864e..7279daebc6 100644
> --- a/doc/guides/tools/index.rst
> +++ b/doc/guides/tools/index.rst
> @@ -16,3 +16,4 @@ DPDK Tools User Guides
>      cryptoperf
>      comp_perf
>      testeventdev
> +    flow-perf
> 

I think above should be alphabetically sorted as well.

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
  2020-04-30 12:00               ` Xiaoyu Min
@ 2020-05-04 12:01               ` Andrew Rybchenko
  2020-05-06  4:00               ` Ajit Khaparde
  2 siblings, 0 replies; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-04 12:01 UTC (permalink / raw)
  To: Wisam Jaddo, dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan

On 4/30/20 1:33 PM, Wisam Jaddo wrote:
> Add insertion rate calculation feature into flow
> performance application.
> 
> The application now provide the ability to test
> insertion rate of specific rte_flow rule, by
> stressing it to the NIC, and calculate the
> insertion rate.
> 
> The application offers some options in the command
> line, to configure which rule to apply.
> 
> After that the application will start producing
> rules with same pattern but increasing the outer IP
> source address by 1 each time, thus it will give
> different flow each time, and all other items will
> have open masks.
> 
> The current design have single core insertion rate.
> In the future we may have a multi core insertion
> rate measurement support in the app.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---
>  app/test-flow-perf/Makefile          |   3 +
>  app/test-flow-perf/actions_gen.c     |  86 ++++++
>  app/test-flow-perf/actions_gen.h     |  48 ++++
>  app/test-flow-perf/flow_gen.c        | 176 ++++++++++++
>  app/test-flow-perf/flow_gen.h        |  61 ++++
>  app/test-flow-perf/items_gen.c       | 265 +++++++++++++++++
>  app/test-flow-perf/items_gen.h       |  68 +++++
>  app/test-flow-perf/main.c            | 416 +++++++++++++++++++++++++--
>  app/test-flow-perf/meson.build       |   8 +
>  app/test-flow-perf/user_parameters.h |  15 +
>  doc/guides/tools/flow-perf.rst       | 186 +++++++++++-
>  11 files changed, 1307 insertions(+), 25 deletions(-)
>  create mode 100644 app/test-flow-perf/actions_gen.c
>  create mode 100644 app/test-flow-perf/actions_gen.h
>  create mode 100644 app/test-flow-perf/flow_gen.c
>  create mode 100644 app/test-flow-perf/flow_gen.h
>  create mode 100644 app/test-flow-perf/items_gen.c
>  create mode 100644 app/test-flow-perf/items_gen.h
> 
> diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
> index 45b1fb1464..968c7c60dd 100644
> --- a/app/test-flow-perf/Makefile
> +++ b/app/test-flow-perf/Makefile
> @@ -19,6 +19,9 @@ CFLAGS += -Wno-unused-function
>  #
>  # all source are stored in SRCS-y
>  #
> +SRCS-y += actions_gen.c
> +SRCS-y += flow_gen.c
> +SRCS-y += items_gen.c
>  SRCS-y += main.c
>  
>  include $(RTE_SDK)/mk/rte.app.mk
> diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
> new file mode 100644
> index 0000000000..564ed820e4
> --- /dev/null
> +++ b/app/test-flow-perf/actions_gen.c
> @@ -0,0 +1,86 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * The file contains the implementations of actions generators.
> + * Each generator is responsible for preparing it's action instance
> + * and initializing it with needed data.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + **/
> +
> +#include <sys/types.h>
> +#include <rte_malloc.h>
> +#include <rte_flow.h>
> +#include <rte_ethdev.h>
> +
> +#include "actions_gen.h"
> +#include "user_parameters.h"
> +
> +void
> +gen_mark(void)
> +{
> +	mark_action.id = MARK_ID;
> +}
> +
> +void
> +gen_queue(uint16_t queue)
> +{
> +	queue_action.index = queue;
> +}
> +
> +void
> +gen_jump(uint16_t next_table)
> +{
> +	jump_action.group = next_table;
> +}

It looks like global structures are used above.
It is hardly future-proof taking above words
about multi-core insertion into account.

> +void
> +gen_rss(uint16_t *queues, uint16_t queues_number)
> +{
> +	uint16_t queue;
> +	struct action_rss_data *rss_data;

It is better to have empty line here, since right now
it looks very misleading taking into account empty
line before NULL check below.

> +	rss_data = rte_malloc("rss_data",
> +		sizeof(struct action_rss_data), 0);
> +
> +	if (rss_data == NULL)
> +		rte_exit(EXIT_FAILURE, "No Memory available!");
> +
> +	*rss_data = (struct action_rss_data){
> +		.conf = (struct rte_flow_action_rss){
> +			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +			.level = 0,
> +			.types = ETH_RSS_IP,

Why does it differ from port RSS types defaults?

> +			.key_len = 0,
> +			.queue_num = queues_number,
> +			.key = 0,
> +			.queue = rss_data->queue,
> +		},
> +		.key = { 0 },
> +		.queue = { 0 },
> +	};
> +
> +	for (queue = 0; queue < queues_number; queue++)
> +		rss_data->queue[queue] = queues[queue];
> +
> +	rss_action = &rss_data->conf;
> +}
> +
> +void
> +gen_set_meta(void)
> +{
> +	meta_action.data = RTE_BE32(META_DATA);
> +	meta_action.mask = RTE_BE32(0xffffffff);
> +}
> +
> +void
> +gen_set_tag(void)
> +{
> +	tag_action.data = RTE_BE32(META_DATA);
> +	tag_action.mask = RTE_BE32(0xffffffff);
> +	tag_action.index = TAG_INDEX;
> +}
> +
> +void
> +gen_port_id(void)
> +{
> +	port_id.id = PORT_ID_DST;
> +}

Global structures again.

> diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
> new file mode 100644
> index 0000000000..556d48b871
> --- /dev/null
> +++ b/app/test-flow-perf/actions_gen.h
> @@ -0,0 +1,48 @@
> +/** SPDX-License-Identifier: BSD-3-Clause
> + *
> + * This file contains the functions definitions to
> + * generate each supported action.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + **/
> +
> +#ifndef _ACTION_GEN_
> +#define _ACTION_GEN_
> +
> +struct rte_flow_action_mark mark_action;
> +struct rte_flow_action_queue queue_action;
> +struct rte_flow_action_jump jump_action;
> +struct rte_flow_action_rss *rss_action;
> +struct rte_flow_action_set_meta meta_action;
> +struct rte_flow_action_set_tag tag_action;
> +struct rte_flow_action_port_id port_id;
> +
> +/* Storage for struct rte_flow_action_rss including external data. */
> +struct action_rss_data {
> +	struct rte_flow_action_rss conf;
> +	uint8_t key[64];
> +	uint16_t queue[128];
> +} action_rss_data;
> +
> +void
> +gen_mark(void);
> +
> +void
> +gen_queue(uint16_t queue);
> +
> +void
> +gen_jump(uint16_t next_table);
> +
> +void
> +gen_rss(uint16_t *queues, uint16_t queues_number);
> +
> +void
> +gen_set_meta(void);
> +
> +void
> +gen_set_tag(void);
> +
> +void
> +gen_port_id(void);
> +
> +#endif
> diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
> new file mode 100644
> index 0000000000..2d42deace9
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.c
> @@ -0,0 +1,176 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * The file contains the implementations of the method to
> + * fill items, actions & attributes in their corresponding
> + * arrays, and then generate rte_flow rule.
> + *
> + * After the generation. The rule goes to validation then
> + * creation state and then return the results.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#include <stdint.h>
> +
> +#include "flow_gen.h"
> +#include "items_gen.h"
> +#include "actions_gen.h"
> +#include "user_parameters.h"
> +
> +
> +static void
> +fill_attributes(struct rte_flow_attr *attr,
> +	uint8_t flow_attrs, uint16_t group)
> +{
> +	if (flow_attrs & INGRESS)
> +		attr->ingress = 1;
> +	if (flow_attrs & EGRESS)
> +		attr->egress = 1;
> +	if (flow_attrs & TRANSFER)
> +		attr->transfer = 1;
> +	attr->group = group;
> +}
> +
> +static void
> +fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint16_t flow_items, uint32_t outer_ip_src)
> +{
> +	uint8_t items_counter = 0;
> +
> +	if (flow_items & META_ITEM)
> +		add_meta_data(items, items_counter++);
> +	if (flow_items & TAG_ITEM)
> +		add_meta_tag(items, items_counter++);
> +	if (flow_items & ETH_ITEM)
> +		add_ether(items, items_counter++);
> +	if (flow_items & VLAN_ITEM)
> +		add_vlan(items, items_counter++);
> +	if (flow_items & IPV4_ITEM)
> +		add_ipv4(items, items_counter++, outer_ip_src);
> +	if (flow_items & IPV6_ITEM)
> +		add_ipv6(items, items_counter++, outer_ip_src);
> +	if (flow_items & TCP_ITEM)
> +		add_tcp(items, items_counter++);
> +	if (flow_items & UDP_ITEM)
> +		add_udp(items, items_counter++);
> +	if (flow_items & VXLAN_ITEM)
> +		add_vxlan(items, items_counter++);
> +	if (flow_items & VXLAN_GPE_ITEM)
> +		add_vxlan_gpe(items, items_counter++);
> +	if (flow_items & GRE_ITEM)
> +		add_gre(items, items_counter++);
> +	if (flow_items & GENEVE_ITEM)
> +		add_geneve(items, items_counter++);
> +	if (flow_items & GTP_ITEM)
> +		add_gtp(items, items_counter++);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
> +}
> +
> +static void
> +fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
> +	uint16_t flow_actions, uint32_t counter, uint16_t next_table)
> +{
> +	uint8_t actions_counter = 0;
> +	uint16_t queues[RXQs];
> +	uint16_t hairpin_queues[HAIRPIN_QUEUES];
> +	uint16_t i;
> +	struct rte_flow_action_count count_action;
> +	uint8_t temp = counter & 0xff;
> +
> +	/* None-fate actions */
> +	if (flow_actions & MARK_ACTION) {
> +		if (!counter)
> +			gen_mark();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
> +		actions[actions_counter++].conf = &mark_action;
> +	}
> +	if (flow_actions & COUNT_ACTION) {
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
> +		actions[actions_counter++].conf = &count_action;
> +	}
> +	if (flow_actions & META_ACTION) {
> +		if (!counter)
> +			gen_set_meta();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
> +		actions[actions_counter++].conf = &meta_action;
> +	}
> +	if (flow_actions & TAG_ACTION) {
> +		if (!counter)
> +			gen_set_tag();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
> +		actions[actions_counter++].conf = &tag_action;
> +	}
> +
> +	/* Fate actions */
> +	if (flow_actions & QUEUE_ACTION) {
> +		gen_queue(counter % RXQs);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & RSS_ACTION) {
> +		for (i = 0; i < RXQs; i++)
> +			queues[i] = (temp >> (i << 1)) & 0x3;
> +		gen_rss(queues, RXQs);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +	if (flow_actions & JUMP_ACTION) {
> +		if (!counter)
> +			gen_jump(next_table);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
> +		actions[actions_counter++].conf = &jump_action;
> +	}
> +	if (flow_actions & PORT_ID_ACTION) {
> +		if (!counter)
> +			gen_port_id();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
> +		actions[actions_counter++].conf = &port_id;
> +	}
> +	if (flow_actions & DROP_ACTION)
> +		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
> +	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
> +		gen_queue((counter % HAIRPIN_QUEUES) + RXQs);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & HAIRPIN_RSS_ACTION) {
> +		for (i = 0; i < HAIRPIN_QUEUES; i++)
> +			hairpin_queues[i] = ((temp >> (i << 1)) & 0x3) + RXQs;
> +		gen_rss(hairpin_queues, RXQs);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +
> +	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
> +}
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	struct rte_flow_error *error)
> +{
> +	struct rte_flow_attr attr;
> +	struct rte_flow_item items[MAX_ITEMS_NUM];
> +	struct rte_flow_action actions[MAX_ACTIONS_NUM];
> +	struct rte_flow *flow = NULL;
> +
> +	memset(items, 0, sizeof(items));
> +	memset(actions, 0, sizeof(actions));
> +	memset(&attr, 0, sizeof(struct rte_flow_attr));
> +
> +	fill_attributes(&attr, flow_attrs, group);
> +
> +	fill_actions(actions, flow_actions,
> +			outer_ip_src, next_table);
> +
> +	fill_items(items, flow_items, outer_ip_src);
> +
> +	flow = rte_flow_create(port_id, &attr, items, actions, error);
> +	return flow;
> +}
> diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
> new file mode 100644
> index 0000000000..99cb9e3791
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.h
> @@ -0,0 +1,61 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * This file contains the items, actions and attributes
> + * definition. And the methods to prepare and fill items,
> + * actions and attributes to generate rte_flow rule.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#ifndef _FLOW_GEN_
> +#define _FLOW_GEN_
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "user_parameters.h"
> +
> +/* Items */
> +#define ETH_ITEM       0x0001
> +#define IPV4_ITEM      0x0002
> +#define IPV6_ITEM      0x0004
> +#define VLAN_ITEM      0x0008
> +#define TCP_ITEM       0x0010
> +#define UDP_ITEM       0x0020
> +#define VXLAN_ITEM     0x0040
> +#define VXLAN_GPE_ITEM 0x0080
> +#define GRE_ITEM       0x0100
> +#define GENEVE_ITEM    0x0200
> +#define GTP_ITEM       0x0400
> +#define META_ITEM      0x0800
> +#define TAG_ITEM       0x1000

May be it is better to use (1UL << RTE_FLOW_ITEM_TYPE_ETH) as
flags in bitmask? E.g. define macro:
#define FLOW_ITEM_MASK(_x) \
    (UINT64_C(1) << ETH_FLOW_ITEM_ ## _x)
and use as
    FLOW_ITEM_MASK(ETH)

> +
> +/* Actions */
> +#define QUEUE_ACTION   0x0001
> +#define MARK_ACTION    0x0002
> +#define JUMP_ACTION    0x0004
> +#define RSS_ACTION     0x0008
> +#define COUNT_ACTION   0x0010
> +#define META_ACTION    0x0020
> +#define TAG_ACTION     0x0040
> +#define DROP_ACTION    0x0080
> +#define PORT_ID_ACTION 0x0100
> +#define HAIRPIN_QUEUE_ACTION 0x0200
> +#define HAIRPIN_RSS_ACTION   0x0400

same as above

> +
> +/* Attributes */
> +#define INGRESS  0x0001
> +#define EGRESS   0x0002
> +#define TRANSFER 0x0004
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	struct rte_flow_error *error);
> +
> +#endif
> diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
> new file mode 100644
> index 0000000000..fb9733d4e7
> --- /dev/null
> +++ b/app/test-flow-perf/items_gen.c
> @@ -0,0 +1,265 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * This file contain the implementations of the items
> + * related methods. Each Item have a method to prepare
> + * the item and add it into items array in given index.
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "items_gen.h"
> +#include "user_parameters.h"
> +
> +static struct rte_flow_item_eth eth_spec;
> +static struct rte_flow_item_eth eth_mask;
> +static struct rte_flow_item_vlan vlan_spec;
> +static struct rte_flow_item_vlan vlan_mask;
> +static struct rte_flow_item_ipv4 ipv4_spec;
> +static struct rte_flow_item_ipv4 ipv4_mask;
> +static struct rte_flow_item_ipv6 ipv6_spec;
> +static struct rte_flow_item_ipv6 ipv6_mask;
> +static struct rte_flow_item_udp udp_spec;
> +static struct rte_flow_item_udp udp_mask;
> +static struct rte_flow_item_tcp tcp_spec;
> +static struct rte_flow_item_tcp tcp_mask;
> +static struct rte_flow_item_vxlan vxlan_spec;
> +static struct rte_flow_item_vxlan vxlan_mask;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
> +static struct rte_flow_item_gre gre_spec;
> +static struct rte_flow_item_gre gre_mask;
> +static struct rte_flow_item_geneve geneve_spec;
> +static struct rte_flow_item_geneve geneve_mask;
> +static struct rte_flow_item_gtp gtp_spec;
> +static struct rte_flow_item_gtp gtp_mask;
> +static struct rte_flow_item_meta meta_spec;
> +static struct rte_flow_item_meta meta_mask;
> +static struct rte_flow_item_tag tag_spec;
> +static struct rte_flow_item_tag tag_mask;

Any global variable usually very bad if you have
a plan to use many CPU cores using threads.

> +
> +
> +void
> +add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
> +	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
> +	eth_spec.type = 0;
> +	eth_mask.type = 0;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
> +	items[items_counter].spec = &eth_spec;
> +	items[items_counter].mask = &eth_mask;
> +}
> +
> +void
> +add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t vlan_value = VLAN_VALUE;
> +	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
> +	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
> +
> +	vlan_spec.tci = RTE_BE16(vlan_value);
> +	vlan_mask.tci = RTE_BE16(0xffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
> +	items[items_counter].spec = &vlan_spec;
> +	items[items_counter].mask = &vlan_mask;
> +}
> +
> +void
> +add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, uint32_t src_ipv4)
> +{
> +	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
> +	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
> +
> +	ipv4_spec.hdr.src_addr = src_ipv4;
> +	ipv4_mask.hdr.src_addr = 0xffffffff;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
> +	items[items_counter].spec = &ipv4_spec;
> +	items[items_counter].mask = &ipv4_mask;
> +}
> +
> +
> +void
> +add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, int src_ipv6)
> +{
> +	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
> +	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
> +
> +	/** Set ipv6 src **/
> +	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
> +					sizeof(ipv6_spec.hdr.src_addr) / 2);
> +
> +	/** Full mask **/
> +	memset(&ipv6_mask.hdr.src_addr, 1,
> +					sizeof(ipv6_spec.hdr.src_addr));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
> +	items[items_counter].spec = &ipv6_spec;
> +	items[items_counter].mask = &ipv6_mask;
> +}
> +
> +void
> +add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
> +	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
> +	items[items_counter].spec = &tcp_spec;
> +	items[items_counter].mask = &tcp_mask;
> +}
> +
> +void
> +add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
> +	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
> +	items[items_counter].spec = &udp_spec;
> +	items[items_counter].mask = &udp_mask;
> +}
> +
> +void
> +add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
> +	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
> +
> +	/* Set standard vxlan vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* Standard vxlan flags **/
> +	vxlan_spec.flags = 0x8;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
> +	items[items_counter].spec = &vxlan_spec;
> +	items[items_counter].mask = &vxlan_mask;
> +}
> +
> +void
> +add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +
> +	/* Set vxlan-gpe vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_gpe_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* vxlan-gpe flags */
> +	vxlan_gpe_spec.flags = 0x0c;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
> +	items[items_counter].spec = &vxlan_gpe_spec;
> +	items[items_counter].mask = &vxlan_gpe_mask;
> +}
> +
> +void
> +add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t proto = GRE_PROTO;
> +	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
> +	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
> +
> +	gre_spec.protocol = RTE_BE16(proto);
> +	gre_mask.protocol = 0xffff;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
> +	items[items_counter].spec = &gre_spec;
> +	items[items_counter].mask = &gre_mask;
> +}
> +
> +void
> +add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
> +	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
> +
> +	for (i = 0; i < 3; i++) {
> +		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
> +		geneve_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
> +	items[items_counter].spec = &geneve_spec;
> +	items[items_counter].mask = &geneve_mask;
> +}
> +
> +void
> +add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t teid_value = TEID_VALUE;
> +	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
> +	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
> +
> +	gtp_spec.teid = RTE_BE32(teid_value);
> +	gtp_mask.teid = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
> +	items[items_counter].spec = &gtp_spec;
> +	items[items_counter].mask = &gtp_mask;
> +}
> +
> +void
> +add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
> +	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
> +
> +	meta_spec.data = RTE_BE32(data);
> +	meta_mask.data = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
> +	items[items_counter].spec = &meta_spec;
> +	items[items_counter].mask = &meta_mask;
> +}
> +
> +
> +void
> +add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	uint8_t index = TAG_INDEX;
> +	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
> +	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
> +
> +	tag_spec.data = RTE_BE32(data);
> +	tag_mask.data = RTE_BE32(0xffffffff);
> +	tag_spec.index = index;
> +	tag_mask.index = 0xff;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
> +	items[items_counter].spec = &tag_spec;
> +	items[items_counter].mask = &tag_mask;
> +}
> diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
> new file mode 100644
> index 0000000000..0b01385951
> --- /dev/null
> +++ b/app/test-flow-perf/items_gen.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * This file contains the items related methods
> + *
> + * Copyright 2020 Mellanox Technologies, Ltd
> + */
> +
> +#ifndef _ITEMS_GEN_
> +#define _ITEMS_GEN_
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "user_parameters.h"
> +
> +void
> +add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, uint32_t src_ipv4);
> +
> +void
> +add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, int src_ipv6);
> +
> +void
> +add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +void
> +add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter);
> +
> +#endif
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> index 156b9ef553..115af4f302 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -49,29 +49,119 @@
>  #include <rte_cycles.h>
>  #include <rte_memory.h>
>  
> +#include "flow_gen.h"
>  #include "user_parameters.h"
>  
> -static uint32_t nb_lcores;
> +#define MAX_ITERATIONS 100
> +
> +struct rte_flow *flow;
> +static uint8_t flow_group;
> +
> +static uint16_t flow_items;
> +static uint16_t flow_actions;
> +static uint8_t flow_attrs;
> +static volatile bool force_quit;
> +static volatile bool dump_iterations;
>  static struct rte_mempool *mbuf_mp;
> +static uint32_t nb_lcores;
> +static uint32_t flows_count;
> +static uint32_t iterations_number;

Global static variables again.

>  static void usage(char *progname)
>  {
>  	printf("\nusage: %s", progname);
> +	printf("\nControl configurations:\n");
> +	printf("  --flows-count=N: to set the number of needed"
> +		" flows to insert, default is 4,000,000\n");
> +	printf("  --dump-iterations: To print rates for each"
> +		" iteration\n");
> +
> +	printf("To set flow attributes:\n");
> +	printf("  --ingress: set ingress attribute in flows\n");
> +	printf("  --egress: set egress attribute in flows\n");
> +	printf("  --transfer: set transfer attribute in flows\n");
> +	printf("  --group=N: set group for all flows,"
> +		" default is 0\n");
> +
> +	printf("To set flow items:\n");
> +	printf("  --ether: add ether layer in flow items\n");
> +	printf("  --vlan: add vlan layer in flow items\n");
> +	printf("  --ipv4: add ipv4 layer in flow items\n");
> +	printf("  --ipv6: add ipv6 layer in flow items\n");
> +	printf("  --tcp: add tcp layer in flow items\n");
> +	printf("  --udp: add udp layer in flow items\n");
> +	printf("  --vxlan: add vxlan layer in flow items\n");
> +	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
> +	printf("  --gre: add gre layer in flow items\n");
> +	printf("  --geneve: add geneve layer in flow items\n");
> +	printf("  --gtp: add gtp layer in flow items\n");
> +	printf("  --meta: add meta layer in flow items\n");
> +	printf("  --tag: add tag layer in flow items\n");
> +
> +	printf("To set flow actions:\n");
> +	printf("  --port-id: add port-id action in flow actions\n");
> +	printf("  --rss: add rss action in flow actions\n");
> +	printf("  --queue: add queue action in flow actions\n");
> +	printf("  --jump: add jump action in flow actions\n");
> +	printf("  --mark: add mark action in flow actions\n");
> +	printf("  --count: add count action in flow actions\n");
> +	printf("  --set-meta: add set meta action in flow actions\n");
> +	printf("  --set-tag: add set tag action in flow actions\n");
> +	printf("  --drop: add drop action in flow actions\n");
> +	printf("  --hairpin-queue: add hairpin-queue action in flow actions\n");
> +	printf("  --hairpin-rss: add hairping-rss action in flow actions\n");
>  }
>  
>  static void
>  args_parse(int argc, char **argv)
>  {
>  	char **argvopt;
> -	int opt;
> +	int n, opt;
>  	int opt_idx;
>  	static struct option lgopts[] = {
>  		/* Control */
>  		{ "help",                       0, 0, 0 },
> +		{ "flows-count",                1, 0, 0 },
> +		{ "dump-iterations",            0, 0, 0 },
> +		/* Attributes */
> +		{ "ingress",                    0, 0, 0 },
> +		{ "egress",                     0, 0, 0 },
> +		{ "transfer",                   0, 0, 0 },
> +		{ "group",                      1, 0, 0 },
> +		/* Items */
> +		{ "ether",                      0, 0, 0 },
> +		{ "vlan",                       0, 0, 0 },
> +		{ "ipv4",                       0, 0, 0 },
> +		{ "ipv6",                       0, 0, 0 },
> +		{ "tcp",                        0, 0, 0 },
> +		{ "udp",                        0, 0, 0 },
> +		{ "vxlan",                      0, 0, 0 },
> +		{ "vxlan-gpe",                  0, 0, 0 },
> +		{ "gre",                        0, 0, 0 },
> +		{ "geneve",                     0, 0, 0 },
> +		{ "gtp",                        0, 0, 0 },
> +		{ "meta",                       0, 0, 0 },
> +		{ "tag",                        0, 0, 0 },
> +		/* Actions */
> +		{ "port-id",                    0, 0, 0 },
> +		{ "rss",                        0, 0, 0 },
> +		{ "queue",                      0, 0, 0 },
> +		{ "jump",                       0, 0, 0 },
> +		{ "mark",                       0, 0, 0 },
> +		{ "count",                      0, 0, 0 },
> +		{ "set-meta",                   0, 0, 0 },
> +		{ "set-tag",                    0, 0, 0 },
> +		{ "drop",                       0, 0, 0 },
> +		{ "hairpin-queue",              0, 0, 0 },
> +		{ "hairpin-rss",                0, 0, 0 },
>  	};
>  
> +	flow_items = 0;
> +	flow_actions = 0;
> +	flow_attrs = 0;
>  	argvopt = argv;
>  
> +	printf(":: Flow -> ");
>  	while ((opt = getopt_long(argc, argvopt, "",
>  				lgopts, &opt_idx)) != EOF) {
>  		switch (opt) {
> @@ -80,6 +170,140 @@ args_parse(int argc, char **argv)
>  				usage(argv[0]);
>  				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>  			}
> +			/* Attributes */
> +			if (!strcmp(lgopts[opt_idx].name, "ingress")) {
> +				flow_attrs |= INGRESS;
> +				printf("ingress ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "egress")) {
> +				flow_attrs |= EGRESS;
> +				printf("egress ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "transfer")) {
> +				flow_attrs |= TRANSFER;
> +				printf("transfer ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "group")) {
> +				n = atoi(optarg);
> +				if (n >= 0)
> +					flow_group = n;
> +				else
> +					rte_exit(EXIT_SUCCESS,
> +						"flow group should be >= 0");
> +				printf("group %d ", flow_group);
> +			}
> +			/* Items */
> +			if (!strcmp(lgopts[opt_idx].name, "ether")) {
> +				flow_items |= ETH_ITEM;
> +				printf("ether / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "ipv4")) {
> +				flow_items |= IPV4_ITEM;
> +				printf("ipv4 / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "vlan")) {
> +				flow_items |= VLAN_ITEM;
> +				printf("vlan / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "ipv6")) {
> +				flow_items |= IPV6_ITEM;
> +				printf("ipv6 / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "tcp")) {
> +				flow_items |= TCP_ITEM;
> +				printf("tcp / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "udp")) {
> +				flow_items |= UDP_ITEM;
> +				printf("udp / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "vxlan")) {
> +				flow_items |= VXLAN_ITEM;
> +				printf("vxlan / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "vxlan-gpe")) {
> +				flow_items |= VXLAN_GPE_ITEM;
> +				printf("vxlan-gpe / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "gre")) {
> +				flow_items |= GRE_ITEM;
> +				printf("gre / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "geneve")) {
> +				flow_items |= GENEVE_ITEM;
> +				printf("geneve / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "gtp")) {
> +				flow_items |= GTP_ITEM;
> +				printf("gtp / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "meta")) {
> +				flow_items |= META_ITEM;
> +				printf("meta / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "tag")) {
> +				flow_items |= TAG_ITEM;
> +				printf("tag / ");
> +			}

I think it is doable using helper structure statically
filled in, array of it and a loop.

> +			/* Actions */
> +			if (!strcmp(lgopts[opt_idx].name, "port-id")) {
> +				flow_actions |= PORT_ID_ACTION;
> +				printf("port-id / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "rss")) {
> +				flow_actions |= RSS_ACTION;
> +				printf("rss / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
> +				flow_actions |= HAIRPIN_RSS_ACTION;
> +				printf("hairpin-rss / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "queue")) {
> +				flow_actions |= QUEUE_ACTION;
> +				printf("queue / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
> +				flow_actions |= HAIRPIN_QUEUE_ACTION;
> +				printf("hairpin-queue / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "jump")) {
> +				flow_actions |= JUMP_ACTION;
> +				printf("jump / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "mark")) {
> +				flow_actions |= MARK_ACTION;
> +				printf("mark / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "count")) {
> +				flow_actions |= COUNT_ACTION;
> +				printf("count / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "set-meta")) {
> +				flow_actions |= META_ACTION;
> +				printf("set-meta / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "set-tag")) {
> +				flow_actions |= TAG_ACTION;
> +				printf("set-tag / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "drop")) {
> +				flow_actions |= DROP_ACTION;
> +				printf("drop / ");
> +			}

Same.

> +			/* Control */
> +			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
> +				n = atoi(optarg);
> +				if (n > (int) iterations_number)
> +					flows_count = n;
> +				else {
> +					printf("\n\nflows_count should be > %d",
> +						iterations_number);
> +					rte_exit(EXIT_SUCCESS, " ");
> +				}
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
> +				dump_iterations = true;
>  			break;
>  		default:
>  			usage(argv[0]);
> @@ -88,6 +312,128 @@ args_parse(int argc, char **argv)
>  			break;
>  		}
>  	}
> +	printf("end_flow\n");
> +}
> +
> +static void
> +print_flow_error(struct rte_flow_error error)
> +{
> +	printf("Flow can't be created %d message: %s\n",
> +		error.type,
> +		error.message ? error.message : "(no stated reason)");
> +}
> +
> +static inline void
> +flows_handler(void)
> +{
> +	struct rte_flow_error error;
> +	clock_t start_iter, end_iter;
> +	double cpu_time_used;
> +	double flows_rate;
> +	double cpu_time_per_iter[MAX_ITERATIONS];
> +	double delta;
> +	uint16_t nr_ports;
> +	uint32_t i;
> +	int port_id;
> +	int iter_id;
> +	uint32_t eagain_counter = 0;
> +
> +	nr_ports = rte_eth_dev_count_avail();
> +
> +	for (i = 0; i < MAX_ITERATIONS; i++)
> +		cpu_time_per_iter[i] = -1;
> +
> +	if (iterations_number > flows_count)
> +		iterations_number = flows_count;
> +
> +	printf(":: Flows Count per port: %d\n", flows_count);
> +
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		cpu_time_used = 0;
> +		if (flow_group > 0) {
> +			/*
> +			 * Create global rule to jumo into flow_group
> +			 * This way the app will avoid the default rules
> +			 *
> +			 * Golbal rule:
> +			 * group 0 eth / end actions jump group <flow_group>
> +			 *
> +			 */
> +			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
> +				JUMP_ACTION, flow_group, 0, &error);
> +
> +			if (!flow) {
> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +		}
> +
> +		/* Insertion Rate */
> +		printf("Flows insertion on port = %d\n", port_id);
> +		start_iter = clock();
> +		for (i = 0; i < flows_count; i++) {
> +			do {
> +				rte_errno = 0;
> +				flow = generate_flow(port_id, flow_group,
> +					flow_attrs, flow_items, flow_actions,
> +					JUMP_ACTION_TABLE, i,  &error);
> +				if (!flow)
> +					eagain_counter++;
> +			} while (rte_errno == EAGAIN);
> +
> +			if (force_quit)
> +				i = flows_count;
> +
> +			if (!flow) {
> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +
> +			if (i && !((i + 1) % iterations_number)) {
> +				/* Save the insertion rate of each iter */
> +				end_iter = clock();
> +				delta = (double) (end_iter - start_iter);
> +				iter_id = ((i + 1) / iterations_number) - 1;
> +				cpu_time_per_iter[iter_id] =
> +					delta / CLOCKS_PER_SEC;
> +				cpu_time_used += cpu_time_per_iter[iter_id];
> +				start_iter = clock();
> +			}
> +		}
> +
> +		/* Iteration rate per iteration */
> +		if (dump_iterations)
> +			for (i = 0; i < MAX_ITERATIONS; i++) {
> +				if (cpu_time_per_iter[i] == -1)
> +					continue;
> +				delta = (double)(iterations_number /
> +					cpu_time_per_iter[i]);
> +				flows_rate = delta / 1000;
> +				printf(":: Iteration #%d: %d flows "
> +					"in %f sec[ Rate = %f K/Sec ]\n",
> +					i, iterations_number,
> +					cpu_time_per_iter[i], flows_rate);
> +			}
> +
> +		/* Insertion rate for all flows */
> +		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
> +		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
> +						flows_rate);
> +		printf(":: The time for creating %d in flows %f seconds\n",
> +						flows_count, cpu_time_used);
> +		printf(":: EAGAIN counter = %d\n", eagain_counter);
> +	}
> +}
> +
> +static void
> +signal_handler(int signum)
> +{
> +	if (signum == SIGINT || signum == SIGTERM) {
> +		printf("\n\nSignal %d received, preparing to exit...\n",
> +					signum);
> +		printf("Error: Stats are wrong due to sudden signal!\n\n");
> +		force_quit = true;
> +	}
>  }
>  
>  static void
> @@ -96,6 +442,8 @@ init_port(void)
>  	int ret;
>  	uint16_t i, j;
>  	uint16_t port_id;
> +	uint16_t nr_queues;
> +	bool hairpin_flag = false;
>  	uint16_t nr_ports = rte_eth_dev_count_avail();
>  	struct rte_eth_hairpin_conf hairpin_conf = {
>  			.peer_count = 1,
> @@ -115,6 +463,13 @@ init_port(void)
>  	struct rte_eth_rxconf rxq_conf;
>  	struct rte_eth_dev_info dev_info;
>  
> +	nr_queues = RXQs;
> +	if (flow_actions & HAIRPIN_QUEUE_ACTION ||
> +		flow_actions & HAIRPIN_RSS_ACTION) {
> +		nr_queues = RXQs + HAIRPIN_QUEUES;
> +		hairpin_flag = true;
> +	}
> +
>  	if (nr_ports == 0)
>  		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
>  	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
> @@ -134,8 +489,8 @@ init_port(void)
>  
>  		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
>  		printf(":: initializing port: %d\n", port_id);
> -		ret = rte_eth_dev_configure(port_id, RXQs + HAIRPIN_QUEUES,
> -				TXQs + HAIRPIN_QUEUES, &port_conf);
> +		ret = rte_eth_dev_configure(port_id, nr_queues,
> +				nr_queues, &port_conf);
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE,
>  					":: cannot configure device: err=%d, port=%u\n",
> @@ -173,26 +528,30 @@ init_port(void)
>  					":: promiscuous mode enable failed: err=%s, port=%u\n",
>  					rte_strerror(-ret), port_id);
>  
> -		for (i = RXQs, j = 0; i < RXQs + HAIRPIN_QUEUES; i++, j++) {
> -			hairpin_conf.peers[0].port = port_id;
> -			hairpin_conf.peers[0].queue = j + TXQs;
> -			ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
> -							NR_RXD, &hairpin_conf);
> -			if (ret != 0)
> -				rte_exit(EXIT_FAILURE,
> -					":: Hairpin rx queue setup failed: err=%d, port=%u\n",
> -					ret, port_id);
> -		}
> +		if (hairpin_flag) {
> +			for (i = RXQs, j = 0;
> +					i < RXQs + HAIRPIN_QUEUES; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + TXQs;
> +				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
> +					NR_RXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}

I think it means that the code should be in a helper
function from the very beginning.


> -		for (i = TXQs, j = 0; i < TXQs + HAIRPIN_QUEUES; i++, j++) {
> -			hairpin_conf.peers[0].port = port_id;
> -			hairpin_conf.peers[0].queue = j + RXQs;
> -			ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
> -							NR_TXD, &hairpin_conf);
> -			if (ret != 0)
> -				rte_exit(EXIT_FAILURE,
> -					":: Hairpin tx queue setup failed: err=%d, port=%u\n",
> -					ret, port_id);
> +			for (i = TXQs, j = 0;
> +					i < TXQs + HAIRPIN_QUEUES; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + RXQs;
> +				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
> +					NR_TXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}


I think it means that the code should be in a helper
function from the very beginning.

>  		}
>  
>  		ret = rte_eth_dev_start(port_id);
> @@ -219,6 +578,15 @@ main(int argc, char **argv)
>  	if (ret < 0)
>  		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>  
> +	force_quit = false;
> +	dump_iterations = false;
> +	flows_count = 4000000;
> +	iterations_number = 100000;
> +	flow_group = 0;
> +
> +	signal(SIGINT, signal_handler);
> +	signal(SIGTERM, signal_handler);
> +
>  	argc -= ret;
>  	argv += ret;
>  
> @@ -232,6 +600,8 @@ main(int argc, char **argv)
>  	if (nb_lcores <= 1)
>  		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
>  
> +	flows_handler();
> +
>  	RTE_LCORE_FOREACH_SLAVE(lcore_id)
>  
>  	if (rte_eal_wait_lcore(lcore_id) < 0)
> diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
> index ec9bb3b3aa..b3941f5c2d 100644
> --- a/app/test-flow-perf/meson.build
> +++ b/app/test-flow-perf/meson.build
> @@ -5,7 +5,15 @@
>  #
>  # To build this example as a standalone application with an already-installed
>  # DPDK instance, use 'make'
> +name = 'flow_perf'
> +allow_experimental_apis = true
> +cflags += '-Wno-deprecated-declarations'
> +cflags += '-Wunused-function'
>  
>  sources = files(
> +	'actions_gen.c',
> +	'flow_gen.c',
> +	'items_gen.c',
>  	'main.c',
>  )
> +deps += ['ethdev']
> diff --git a/app/test-flow-perf/user_parameters.h b/app/test-flow-perf/user_parameters.h
> index 56ec7f47b5..1d157430b6 100644
> --- a/app/test-flow-perf/user_parameters.h
> +++ b/app/test-flow-perf/user_parameters.h
> @@ -14,3 +14,18 @@
>  #define MBUF_CACHE_SIZE 512
>  #define NR_RXD  256
>  #define NR_TXD  256
> +
> +/** Items/Actions parameters **/
> +#define JUMP_ACTION_TABLE 2
> +#define VLAN_VALUE 1
> +#define VNI_VALUE 1
> +#define GRE_PROTO  0x6558
> +#define META_DATA 1
> +#define TAG_INDEX 0
> +#define PORT_ID_DST 1
> +#define MARK_ID 1
> +#define TEID_VALUE 1
> +
> +/** Flow items/acctions max size **/
> +#define MAX_ITEMS_NUM 20
> +#define MAX_ACTIONS_NUM 20
> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> index 30ce1b6cc0..62e038c430 100644
> --- a/doc/guides/tools/flow-perf.rst
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -4,7 +4,19 @@
>  RTE Flow performance tool
>  =========================
>  
> -Application for rte_flow performance testing.
> +Application for rte_flow performance testing. The application provide the
> +ability to test insertion rate of specific rte_flow rule, by stressing it
> +to the NIC, and calculate the insertion rate.
> +
> +The application offers some options in the command line, to configure
> +which rule to apply.
> +
> +After that the application will start producing rules with same pattern
> +but increasing the outer IP source address by 1 each time, thus it will
> +give different flow each time, and all other items will have open masks.
> +
> +The current design have single core insertion rate. In the future we may
> +have a multi core insertion rate measurement support in the app.
>  
>  
>  Compiling the Application
> @@ -61,9 +73,179 @@ a ``--`` separator:
>  
>  .. code-block:: console
>  
> -	sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
> +	sudo ./flow_perf -n 4 -w 08:00.0,dv_flow_en=1 -- --ingress --ether --ipv4 --queue --flows-count=1000000
>  
>  The command line options are:
>  
>  *	``--help``
>  	Display a help message and quit.
> +
> +*	``--flows-count=N``
> +	Set the number of needed flows to insert,
> +	where 1 <= N <= "number of flows".
> +	The default value is 4,000,000.
> +
> +*	``--dump-iterations``
> +	Print rates for each iteration of flows.
> +	Default iteration is 1,00,000.
> +
> +
> +Attributes:
> +
> +*	``--ingress``
> +	Set Ingress attribute to all flows attributes.
> +
> +*	``--egress``
> +	Set Egress attribute to all flows attributes.
> +
> +*	``--transfer``
> +	Set Transfer attribute to all flows attributes.
> +
> +*	``--group=N``
> +	Set group for all flows, where N >= 0.
> +	Default group is 0.
> +
> +Items:
> +
> +*	``--ether``
> +	Add Ether item to all flows items, This item have open mask.
> +
> +*	``--vlan``
> +	Add VLAN item to all flows items,
> +	This item have VLAN value defined in user_parameters.h
> +	under ``VNI_VALUE`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--ipv4``
> +	Add IPv4 item to all flows items,
> +	This item have incremental source IP, with full mask.
> +	Other fields are open mask.
> +
> +*	``--ipv6``
> +	Add IPv6 item to all flows item,
> +	This item have incremental source IP, with full mask.
> +	Other fields are open mask.
> +
> +*	``--tcp``
> +	Add TCP item to all flows items, This item have open mask.
> +
> +*	``--udp``
> +	Add UDP item to all flows items, This item have open mask.
> +
> +*	``--vxlan``
> +	Add VXLAN item to all flows items,
> +	This item have VNI value defined in user_parameters.h
> +	under ``VNI_VALUE`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--vxlan-gpe``
> +	Add VXLAN-GPE item to all flows items,
> +	This item have VNI value defined in user_parameters.h
> +	under ``VNI_VALUE`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--gre``
> +	Add GRE item to all flows items,
> +	This item have protocol value defined in user_parameters.h
> +	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
> +	Other fields are open mask.
> +
> +*	``--geneve``
> +	Add GENEVE item to all flows items,
> +	This item have VNI value defined in user_parameters.h
> +	under ``VNI_VALUE`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--gtp``
> +	Add GTP item to all flows items,
> +	This item have TEID value defined in user_parameters.h
> +	under ``TEID_VALUE`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--meta``
> +	Add Meta item to all flows items,
> +	This item have data value defined in user_parameters.h
> +	under ``META_DATA`` with full mask, default value = 1.
> +	Other fields are open mask.
> +
> +*	``--tag``
> +	Add Tag item to all flows items,
> +	This item have data value defined in user_parameters.h
> +	under ``META_DATA`` with full mask, default value = 1.
> +
> +	Also it have tag value defined in user_parameters.h
> +	under ``TAG_INDEX`` with full mask, default value = 0.
> +	Other fields are open mask.
> +
> +
> +Actions:
> +
> +*	``--port-id``
> +	Add port redirection action to all flows actions.
> +	Port redirection destination is defined in user_parameters.h
> +	under PORT_ID_DST, default value = 1.
> +
> +*	``--rss``
> +	Add RSS action to all flows actions,
> +	The queues in RSS action will be all queues configured
> +	in the app.
> +
> +*	``--queue``
> +	Add queue action to all flows items,
> +	The queue will change in round robin state for each flow.
> +
> +	For example:
> +		The app running with 4 RX queues
> +		Flow #0: queue index 0
> +		Flow #1: queue index 1
> +		Flow #2: queue index 2
> +		Flow #3: queue index 3
> +		Flow #4: queue index 0
> +		...
> +
> +*	``--jump``
> +	Add jump action to all flows actions.
> +	Jump action destination is defined in user_parameters.h
> +	under ``JUMP_ACTION_TABLE``, default value = 2.
> +
> +*	``--mark``
> +	Add mark action to all flows actions.
> +	Mark action id is defined in user_parameters.h
> +	under ``MARK_ID``, default value = 1.
> +
> +*	``--count``
> +	Add count action to all flows actions.
> +
> +*	``--set-meta``
> +	Add set-meta action to all flows actions.
> +	Meta data is defined in user_parameters.h under ``META_DATA``
> +	with full mask, default value = 1.
> +
> +*	``--set-tag``
> +	Add set-tag action to all flows actions.
> +	Meta data is defined in user_parameters.h under ``META_DATA``
> +	with full mask, default value = 1.
> +
> +	Tag index is defined in user_parameters.h under ``TAG_INDEX``
> +	with full mask, default value = 0.
> +
> +*	``--drop``
> +	Add drop action to all flows actions.
> +
> +*	``--hairpin-queue``
> +	Add hairpin queue action to all flows actions.
> +	The queue will change in round robin state for each flow.
> +
> +	For example:
> +		The app running with 4 RX hairpin queues and 4 normal RX queues
> +		Flow #0: queue index 4
> +		Flow #1: queue index 5
> +		Flow #2: queue index 6
> +		Flow #3: queue index 7
> +		Flow #4: queue index 4
> +		...
> +
> +*	``--hairpin-rss``
> +	Add hairpin RSS action to all flows actions.
> +	The queues in RSS action will be all hairpin queues configured
> +	in the app.
> 


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-04 10:16               ` Andrew Rybchenko
@ 2020-05-05 10:45                 ` Wisam Monther
  2020-05-05 11:05                   ` Thomas Monjalon
  2020-05-05 10:47                 ` Wisam Monther
  2020-05-06  2:49                 ` Ajit Khaparde
  2 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-05-05 10:45 UTC (permalink / raw)
  To: Andrew Rybchenko, dev, Jack Min, Thomas Monjalon, jerinjacobk,
	gerlitz.or, l.yan

>-----Original Message-----
>From: Andrew Rybchenko <arybchenko@solarflare.com>
>Sent: Monday, May 4, 2020 1:17 PM
>To: Wisam Monther <wisamm@mellanox.com>; dev@dpdk.org; Jack Min
><jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>jerinjacobk@gmail.com; gerlitz.or@gmail.com; l.yan@epfl.ch
>Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>performance skeleton
>
>On 4/30/20 1:33 PM, Wisam Jaddo wrote:
>> Add flow performance application skeleton.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>> ---
>>  MAINTAINERS                          |   5 +
>>  app/Makefile                         |   1 +
>>  app/meson.build                      |   1 +
>>  app/test-flow-perf/Makefile          |  26 +++
>>  app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
>>  app/test-flow-perf/meson.build       |  11 ++
>>  app/test-flow-perf/user_parameters.h |  16 ++
>>  config/common_base                   |   5 +
>>  doc/guides/tools/flow-perf.rst       |  69 ++++++++
>>  doc/guides/tools/index.rst           |   1 +
>>  10 files changed, 381 insertions(+)
>>  create mode 100644 app/test-flow-perf/Makefile  create mode 100644
>> app/test-flow-perf/main.c  create mode 100644
>> app/test-flow-perf/meson.build  create mode 100644
>> app/test-flow-perf/user_parameters.h
>>  create mode 100644 doc/guides/tools/flow-perf.rst
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS index d31a809292..b5632c1bf5
>> 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
>>  F: app/test-pmd/
>>  F: doc/guides/testpmd_app_ug/
>>
>> +Flow performance tool
>> +M: Wisam Jaddo <wisamm@mellanox.com>
>> +F: app/test-flow-perf
>> +F: doc/guides/flow-perf.rst
>> +
>
>Shouldn't it be alphabetially sorted? I think by app name.

It looks no,
Current order:
- Sample_packet_forward
- test-pmd
- comp_perf
- eventdev
- proc-info

So I'll move the new app to be in the end of the test apps, since it's been added last, is this ok?


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-04 10:16               ` Andrew Rybchenko
  2020-05-05 10:45                 ` Wisam Monther
@ 2020-05-05 10:47                 ` Wisam Monther
  2020-05-06  2:49                 ` Ajit Khaparde
  2 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-05-05 10:47 UTC (permalink / raw)
  To: Andrew Rybchenko, dev, Jack Min, Thomas Monjalon, jerinjacobk,
	gerlitz.or, l.yan



>-----Original Message-----
>From: Andrew Rybchenko <arybchenko@solarflare.com>
>Sent: Monday, May 4, 2020 1:17 PM
>To: Wisam Monther <wisamm@mellanox.com>; dev@dpdk.org; Jack Min
><jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>jerinjacobk@gmail.com; gerlitz.or@gmail.com; l.yan@epfl.ch
>Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>performance skeleton
>
>On 4/30/20 1:33 PM, Wisam Jaddo wrote:
>> Add flow performance application skeleton.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>> ---
>>  MAINTAINERS                          |   5 +
>>  app/Makefile                         |   1 +
>>  app/meson.build                      |   1 +
>>  app/test-flow-perf/Makefile          |  26 +++
>>  app/test-flow-perf/main.c            | 246 +++++++++++++++++++++++++++
>>  app/test-flow-perf/meson.build       |  11 ++
>>  app/test-flow-perf/user_parameters.h |  16 ++
>>  config/common_base                   |   5 +
>>  doc/guides/tools/flow-perf.rst       |  69 ++++++++
>>  doc/guides/tools/index.rst           |   1 +
>>  10 files changed, 381 insertions(+)
>>  create mode 100644 app/test-flow-perf/Makefile  create mode 100644
>> app/test-flow-perf/main.c  create mode 100644
>> app/test-flow-perf/meson.build  create mode 100644
>> app/test-flow-perf/user_parameters.h
>>  create mode 100644 doc/guides/tools/flow-perf.rst
>>
>> diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
>> index 782b30864e..7279daebc6 100644
>> --- a/doc/guides/tools/index.rst
>> +++ b/doc/guides/tools/index.rst
>> @@ -16,3 +16,4 @@ DPDK Tools User Guides
>>      cryptoperf
>>      comp_perf
>>      testeventdev
>> +    flow-perf
>>
>
>I think above should be alphabetically sorted as well.

Same as this, the current is not alphabetically sorted so I have it in the end

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-05 10:45                 ` Wisam Monther
@ 2020-05-05 11:05                   ` Thomas Monjalon
  0 siblings, 0 replies; 102+ messages in thread
From: Thomas Monjalon @ 2020-05-05 11:05 UTC (permalink / raw)
  To: Wisam Monther
  Cc: Andrew Rybchenko, dev, Jack Min, jerinjacobk, gerlitz.or, l.yan

05/05/2020 12:45, Wisam Monther:
> From: Andrew Rybchenko <arybchenko@solarflare.com>
> > On 4/30/20 1:33 PM, Wisam Jaddo wrote:
> >> --- a/MAINTAINERS
> >> +++ b/MAINTAINERS
> >> @@ -1504,6 +1504,11 @@ T: git://dpdk.org/next/dpdk-next-net
> >>  F: app/test-pmd/
> >>  F: doc/guides/testpmd_app_ug/
> >>
> >> +Flow performance tool
> >> +M: Wisam Jaddo <wisamm@mellanox.com>
> >> +F: app/test-flow-perf
> >> +F: doc/guides/flow-perf.rst
> >> +
> >
> >Shouldn't it be alphabetially sorted? I think by app name.
> 
> It looks no,
> Current order:
> - Sample_packet_forward
> - test-pmd
> - comp_perf
> - eventdev
> - proc-info
> 
> So I'll move the new app to be in the end of the test apps, since it's been added last, is this ok?

If no alphabetical sorting, there should be a logical one.
Having rte_flow perf testing after general ethdev testing (testpmd),
like you did, is good in my opinion.



^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-04 10:16               ` Andrew Rybchenko
  2020-05-05 10:45                 ` Wisam Monther
  2020-05-05 10:47                 ` Wisam Monther
@ 2020-05-06  2:49                 ` Ajit Khaparde
  2020-05-06  7:32                   ` Wisam Monther
  2 siblings, 1 reply; 102+ messages in thread
From: Ajit Khaparde @ 2020-05-06  2:49 UTC (permalink / raw)
  To: Andrew Rybchenko
  Cc: Wisam Jaddo, dpdk-dev, jackmin, Thomas Monjalon, Jerin Jacob,
	gerlitz.or, l.yan

::snip::


> > +             },
> > +             .rx_adv_conf = {
> > +                     .rss_conf.rss_hf =
> > +                                     ETH_RSS_IP  |
> > +                                     ETH_RSS_UDP |
>
> May be it is better to remove ETH_RSS_UDP by default,
> since it is less common that RSS for TCP because of
> possible fragmentation and packets from the same
> stream delivered to different CPU cores.
>
If we want to enable RSS on L4 headers, then UDP and TCP should be fine.
Its an example app anyway?
Otherwise we can just stick with L3 hash like some of the other examples.
::snip::

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton
  2020-04-09 15:42 ` [dpdk-dev] [PATCH 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
                     ` (5 preceding siblings ...)
  2020-04-30  7:08   ` [dpdk-dev] [PATCH v2 0/5] *** Introduce flow perf application *** Wisam Jaddo
@ 2020-05-06  3:00   ` Ajit Khaparde
  6 siblings, 0 replies; 102+ messages in thread
From: Ajit Khaparde @ 2020-05-06  3:00 UTC (permalink / raw)
  To: Wisam Jaddo; +Cc: dpdk-dev, jackmin, Jerin Jacob, Thomas Monjalon

::snip::


> +
> +Running the Application
> +=======================
> +
> +EAL Command-line Options
> +------------------------
> +
> +Please refer to :doc:`EAL parameters (Linux)
> <../linux_gsg/linux_eal_parameters>`
> +or :doc:`EAL parameters (FreeBSD)
> <../freebsd_gsg/freebsd_eal_parameters>` for
> +a list of available EAL command-line options.
> +
> +
> +Flow performance Options
> +------------------------
> +
> +The following are the command-line options for the flow performance
> application.
> +They must be separated from the EAL options, shown in the previous
> section, with
> +a ``--`` separator:
> +
> +.. code-block:: console
> +
> +       sudo ./test-flow-perf -n 4 -w 08:00.0,dv_flow_en=1 --
>
Since this is a generic usage doc, the specific arg should not be specified
here.
This example could be in the vendor/pmd specific usage guide though.


> +
> +The command line options are:
> +
> +*      ``--help``
> +       Display a help message and quit.
> diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
> index 782b30864e..7279daebc6 100644
> --- a/doc/guides/tools/index.rst
> +++ b/doc/guides/tools/index.rst
> @@ -16,3 +16,4 @@ DPDK Tools User Guides
>      cryptoperf
>      comp_perf
>      testeventdev
> +    flow-perf
> --
> 2.17.1
>
>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation Wisam Jaddo
  2020-04-30 12:00               ` Xiaoyu Min
  2020-05-04 12:01               ` Andrew Rybchenko
@ 2020-05-06  4:00               ` Ajit Khaparde
  2020-05-06 12:33                 ` Wisam Monther
  2 siblings, 1 reply; 102+ messages in thread
From: Ajit Khaparde @ 2020-05-06  4:00 UTC (permalink / raw)
  To: Wisam Jaddo
  Cc: dpdk-dev, jackmin, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan

::snip::

>
> +void
> +gen_rss(uint16_t *queues, uint16_t queues_number)
> +{
> +       uint16_t queue;
> +       struct action_rss_data *rss_data;
> +       rss_data = rte_malloc("rss_data",
> +               sizeof(struct action_rss_data), 0);
> +
> +       if (rss_data == NULL)
> +               rte_exit(EXIT_FAILURE, "No Memory available!");
> +
> +       *rss_data = (struct action_rss_data){
> +               .conf = (struct rte_flow_action_rss){
> +                       .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +                       .level = 0,
> +                       .types = ETH_RSS_IP,
> +                       .key_len = 0,
> +                       .queue_num = queues_number,
> +                       .key = 0,
>
Use rte_rand() or some fixed number to generate a key instead of 0.

::snip::

+
> +Actions:
> +
> +*      ``--port-id``
> +       Add port redirection action to all flows actions.
> +       Port redirection destination is defined in user_parameters.h
> +       under PORT_ID_DST, default value = 1.
> +
> +*      ``--rss``
> +       Add RSS action to all flows actions,
> +       The queues in RSS action will be all queues configured
> +       in the app.
>
Maybe use a subset of the queues configured in the app.
We may want to make sure the RSS is happening because of a flow hit,
and not because of default port RSS?
::snip::

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 4/5] app/test-flow-perf: add memory dump to app Wisam Jaddo
  2020-04-30 12:03               ` Xiaoyu Min
@ 2020-05-06  4:10               ` Ajit Khaparde
  1 sibling, 0 replies; 102+ messages in thread
From: Ajit Khaparde @ 2020-05-06  4:10 UTC (permalink / raw)
  To: Wisam Jaddo
  Cc: dpdk-dev, jackmin, Thomas Monjalon, Jerin Jacob, gerlitz.or,
	l.yan, Suanming Mou

On Thu, Apr 30, 2020 at 3:34 AM Wisam Jaddo <wisamm@mellanox.com> wrote:

> Introduce new feature to dump memory statistics of each socket
> and a total for all before and after the creation.
>
> This will give two main advantage:
> 1- Check the memory consumption for large number of flows
> "insertion rate scenario alone"
>
> 2- Check that no memory leackage after doing insertion then
> deletion.
>
> Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---
>  app/test-flow-perf/main.c      | 69 ++++++++++++++++++++++++++++++++++
>  doc/guides/tools/flow-perf.rst |  6 ++-
>  2 files changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> index 7c11c0b577..95435910de 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -62,6 +62,7 @@ static uint16_t flow_actions;
>  static uint8_t flow_attrs;
>  static volatile bool force_quit;
>  static volatile bool dump_iterations;
> +static volatile bool dump_socket_mem_flag;
>  static volatile bool delete_flag;
>  static struct rte_mempool *mbuf_mp;
>  static uint32_t nb_lcores;
> @@ -78,6 +79,7 @@ static void usage(char *progname)
>                 " iteration\n");
>         printf("  --deletion-rate: Enable deletion rate"
>                 " calculations\n");
> +       printf("  --dump-socket-mem: to dump all socket memory\n");
>
A nit.. keep the usage text consistent when it comes to lower/upper case.

--dump-iterations: To
                          ^
--deletion-rate: Enable
                      ^
--dump-socket-mem: to
                              ^

::snip::


>

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-06  2:49                 ` Ajit Khaparde
@ 2020-05-06  7:32                   ` Wisam Monther
  2020-05-06  8:48                     ` Andrew Rybchenko
  0 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-05-06  7:32 UTC (permalink / raw)
  To: Ajit Khaparde, Andrew Rybchenko
  Cc: dpdk-dev, Jack Min, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan

I agree, since this is a test application,
We can have L4 UDP/TCP rss configuration, since all
Flows/traffic are allowed here and there is nothing to be common here.

From: Ajit Khaparde <ajit.khaparde@broadcom.com>
Sent: Wednesday, May 6, 2020 5:50 AM
To: Andrew Rybchenko <arybchenko@solarflare.com>
Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev <dev@dpdk.org>; Jack Min <jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>; Jerin Jacob <jerinjacobk@gmail.com>; gerlitz.or@gmail.com; l.yan@epfl.ch
Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton



::snip::


> +             },
> +             .rx_adv_conf = {
> +                     .rss_conf.rss_hf =
> +                                     ETH_RSS_IP  |
> +                                     ETH_RSS_UDP |

May be it is better to remove ETH_RSS_UDP by default,
since it is less common that RSS for TCP because of
possible fragmentation and packets from the same
stream delivered to different CPU cores.
If we want to enable RSS on L4 headers, then UDP and TCP should be fine.
Its an example app anyway?
Otherwise we can just stick with L3 hash like some of the other examples.
::snip::


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-06  7:32                   ` Wisam Monther
@ 2020-05-06  8:48                     ` Andrew Rybchenko
  2020-05-06  8:51                       ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-06  8:48 UTC (permalink / raw)
  To: Wisam Monther, Ajit Khaparde
  Cc: dpdk-dev, Jack Min, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan

On 5/6/20 10:32 AM, Wisam Monther wrote:
> I agree, since this is a test application,
> We can have L4 UDP/TCP rss configuration, since all
> Flows/traffic are allowed here and there is nothing to be common here.

UDP RSS is less common and could simply be unsupported.
So, it will be harder to use the tool for corresponding NICs.

> From: Ajit Khaparde <ajit.khaparde@broadcom.com>
> Sent: Wednesday, May 6, 2020 5:50 AM
> To: Andrew Rybchenko <arybchenko@solarflare.com>
> Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev <dev@dpdk.org>; Jack Min <jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>; Jerin Jacob <jerinjacobk@gmail.com>; gerlitz.or@gmail.com; l.yan@epfl.ch
> Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
>
>
>
> ::snip::
>
>
>> +             },
>> +             .rx_adv_conf = {
>> +                     .rss_conf.rss_hf =
>> +                                     ETH_RSS_IP  |
>> +                                     ETH_RSS_UDP |
> May be it is better to remove ETH_RSS_UDP by default,
> since it is less common that RSS for TCP because of
> possible fragmentation and packets from the same
> stream delivered to different CPU cores.
> If we want to enable RSS on L4 headers, then UDP and TCP should be fine.
> Its an example app anyway?
> Otherwise we can just stick with L3 hash like some of the other examples.
> ::snip::
>


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-06  8:48                     ` Andrew Rybchenko
@ 2020-05-06  8:51                       ` Wisam Monther
  2020-05-06  8:54                         ` Andrew Rybchenko
  0 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-05-06  8:51 UTC (permalink / raw)
  To: Andrew Rybchenko, Ajit Khaparde
  Cc: dpdk-dev, Jack Min, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan



>-----Original Message-----
>From: Andrew Rybchenko <arybchenko@solarflare.com>
>Sent: Wednesday, May 6, 2020 11:48 AM
>To: Wisam Monther <wisamm@mellanox.com>; Ajit Khaparde
><ajit.khaparde@broadcom.com>
>Cc: dpdk-dev <dev@dpdk.org>; Jack Min <jackmin@mellanox.com>; Thomas
>Monjalon <thomas@monjalon.net>; Jerin Jacob <jerinjacobk@gmail.com>;
>gerlitz.or@gmail.com; l.yan@epfl.ch
>Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>performance skeleton
>
>On 5/6/20 10:32 AM, Wisam Monther wrote:
>> I agree, since this is a test application, We can have L4 UDP/TCP rss
>> configuration, since all Flows/traffic are allowed here and there is
>> nothing to be common here.
>
>UDP RSS is less common and could simply be unsupported.
>So, it will be harder to use the tool for corresponding NICs.

Ok,
So we can go with only IP without TCP
This way we can call the support of hash up to L3 only.
Are we ok with this?

>
>> From: Ajit Khaparde <ajit.khaparde@broadcom.com>
>> Sent: Wednesday, May 6, 2020 5:50 AM
>> To: Andrew Rybchenko <arybchenko@solarflare.com>
>> Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev
><dev@dpdk.org>; Jack
>> Min <jackmin@mellanox.com>; Thomas Monjalon
><thomas@monjalon.net>;
>> Jerin Jacob <jerinjacobk@gmail.com>; gerlitz.or@gmail.com;
>> l.yan@epfl.ch
>> Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>> performance skeleton
>>
>>
>>
>> ::snip::
>>
>>
>>> +             },
>>> +             .rx_adv_conf = {
>>> +                     .rss_conf.rss_hf =
>>> +                                     ETH_RSS_IP  |
>>> +                                     ETH_RSS_UDP |
>> May be it is better to remove ETH_RSS_UDP by default, since it is less
>> common that RSS for TCP because of possible fragmentation and packets
>> from the same stream delivered to different CPU cores.
>> If we want to enable RSS on L4 headers, then UDP and TCP should be fine.
>> Its an example app anyway?
>> Otherwise we can just stick with L3 hash like some of the other examples.
>> ::snip::
>>


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton
  2020-05-06  8:51                       ` Wisam Monther
@ 2020-05-06  8:54                         ` Andrew Rybchenko
  0 siblings, 0 replies; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-06  8:54 UTC (permalink / raw)
  To: Wisam Monther, Ajit Khaparde
  Cc: dpdk-dev, Jack Min, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan

On 5/6/20 11:51 AM, Wisam Monther wrote:
>
>> -----Original Message-----
>> From: Andrew Rybchenko <arybchenko@solarflare.com>
>> Sent: Wednesday, May 6, 2020 11:48 AM
>> To: Wisam Monther <wisamm@mellanox.com>; Ajit Khaparde
>> <ajit.khaparde@broadcom.com>
>> Cc: dpdk-dev <dev@dpdk.org>; Jack Min <jackmin@mellanox.com>; Thomas
>> Monjalon <thomas@monjalon.net>; Jerin Jacob <jerinjacobk@gmail.com>;
>> gerlitz.or@gmail.com; l.yan@epfl.ch
>> Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>> performance skeleton
>>
>> On 5/6/20 10:32 AM, Wisam Monther wrote:
>>> I agree, since this is a test application, We can have L4 UDP/TCP rss
>>> configuration, since all Flows/traffic are allowed here and there is
>>> nothing to be common here.
>> UDP RSS is less common and could simply be unsupported.
>> So, it will be harder to use the tool for corresponding NICs.
> Ok,
> So we can go with only IP without TCP
> This way we can call the support of hash up to L3 only.
> Are we ok with this?
>

I"m OK with IP+TCP as the most common case.

>>> From: Ajit Khaparde <ajit.khaparde@broadcom.com>
>>> Sent: Wednesday, May 6, 2020 5:50 AM
>>> To: Andrew Rybchenko <arybchenko@solarflare.com>
>>> Cc: Wisam Monther <wisamm@mellanox.com>; dpdk-dev
>> <dev@dpdk.org>; Jack
>>> Min <jackmin@mellanox.com>; Thomas Monjalon
>> <thomas@monjalon.net>;
>>> Jerin Jacob <jerinjacobk@gmail.com>; gerlitz.or@gmail.com;
>>> l.yan@epfl.ch
>>> Subject: Re: [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow
>>> performance skeleton
>>>
>>>
>>>
>>> ::snip::
>>>
>>>
>>>> +             },
>>>> +             .rx_adv_conf = {
>>>> +                     .rss_conf.rss_hf =
>>>> +                                     ETH_RSS_IP  |
>>>> +                                     ETH_RSS_UDP |
>>> May be it is better to remove ETH_RSS_UDP by default, since it is less
>>> common that RSS for TCP because of possible fragmentation and packets
>>> from the same stream delivered to different CPU cores.
>>> If we want to enable RSS on L4 headers, then UDP and TCP should be fine.
>>> Its an example app anyway?
>>> Otherwise we can just stick with L3 hash like some of the other examples.
>>> ::snip::
>>>


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
  2020-05-06  4:00               ` Ajit Khaparde
@ 2020-05-06 12:33                 ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-05-06 12:33 UTC (permalink / raw)
  To: Ajit Khaparde
  Cc: dpdk-dev, Jack Min, Thomas Monjalon, Jerin Jacob, gerlitz.or, l.yan

We can use mark + rss in the same rule to check this,
Moreover this will not provide any ability to validate functionality other than creation,
And measure the performance stuff “insertion/deletion/memory consumption & pps”.
So I think rss functionality testing should be done using testpmd.

From: Ajit Khaparde <ajit.khaparde@broadcom.com>
Sent: Wednesday, May 6, 2020 7:00 AM
To: Wisam Monther <wisamm@mellanox.com>
Cc: dpdk-dev <dev@dpdk.org>; Jack Min <jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>; Jerin Jacob <jerinjacobk@gmail.com>; gerlitz.or@gmail.com; l.yan@epfl.ch
Subject: Re: [dpdk-dev] [PATCH v4 2/5] app/test-flow-perf: add insertion rate calculation
::snip::

+
+Actions:
+
+*      ``--port-id``
+       Add port redirection action to all flows actions.
+       Port redirection destination is defined in user_parameters.h
+       under PORT_ID_DST, default value = 1.
+
+*      ``--rss``
+       Add RSS action to all flows actions,
+       The queues in RSS action will be all queues configured
+       in the app.
Maybe use a subset of the queues configured in the app.
We may want to make sure the RSS is happening because of a flow hit,
and not because of default port RSS?
::snip::

^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application
  2020-04-30 10:33             ` [dpdk-dev] [PATCH v4 1/5] app/test-flow-perf: add flow performance skeleton Wisam Jaddo
  2020-04-30 11:59               ` Xiaoyu Min
  2020-05-04 10:16               ` Andrew Rybchenko
@ 2020-05-06 12:36               ` Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
                                   ` (5 more replies)
  2 siblings, 6 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde

Add new application to test rte flow performance from:
- Insertion rate.
- Deletion rate.
- Memory consumption.
- PPS forward measurement.

---
v5:
* Add app to 20.05 release notes.
* Addressing comments.
* Fix compilation issue for gcc >= 10.
* Fix documentation.
* Remove unneeded CFLAGS.
* Remove unused includes.
* Addressing format comments.
* Move hairpin to be option use only.
* Use RSS hash IP + TCP in ports and rss action.
* Introduce and use new macro for bit flags.

v4:
* Fix compilation error due to variable set but not used.

v3:
* Fix passing hairpin queues to hairpin rss action.

v2:
* reset cpu_time_used every port.
* generate different RSS action every flow with different RETA.
* Fix in commit log message

Wisam Jaddo (5):
  app/flow-perf: add flow performance skeleton
  app/flow-perf: add insertion rate calculation
  app/flow-perf: add deletion rate calculation
  app/flow-perf: add memory dump to app
  app/flow-perf: add packet forwarding support

 MAINTAINERS                            |    5 +
 app/Makefile                           |    1 +
 app/meson.build                        |    1 +
 app/test-flow-perf/Makefile            |   26 +
 app/test-flow-perf/actions_gen.c       |   88 ++
 app/test-flow-perf/actions_gen.h       |   53 ++
 app/test-flow-perf/config.h            |   29 +
 app/test-flow-perf/flow_gen.c          |  179 +++++
 app/test-flow-perf/flow_gen.h          |   63 ++
 app/test-flow-perf/items_gen.c         |  265 +++++++
 app/test-flow-perf/items_gen.h         |   67 ++
 app/test-flow-perf/main.c              | 1014 ++++++++++++++++++++++++
 app/test-flow-perf/meson.build         |   11 +
 config/common_base                     |    5 +
 doc/guides/rel_notes/release_20_05.rst |   10 +
 doc/guides/tools/flow-perf.rst         |  239 ++++++
 doc/guides/tools/index.rst             |    1 +
 17 files changed, 2057 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/config.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 doc/guides/tools/flow-perf.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
@ 2020-05-06 12:36                 ` Wisam Jaddo
  2020-05-06 14:25                   ` Andrew Rybchenko
                                     ` (2 more replies)
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation Wisam Jaddo
                                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                            |   5 +
 app/Makefile                           |   1 +
 app/meson.build                        |   1 +
 app/test-flow-perf/Makefile            |  23 +++
 app/test-flow-perf/config.h            |  14 ++
 app/test-flow-perf/main.c              | 200 +++++++++++++++++++++++++
 app/test-flow-perf/meson.build         |   8 +
 config/common_base                     |   5 +
 doc/guides/rel_notes/release_20_05.rst |  10 ++
 doc/guides/tools/flow-perf.rst         |  44 ++++++
 doc/guides/tools/index.rst             |   1 +
 11 files changed, 312 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/config.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index e05c80504..7d678e15d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1506,6 +1506,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index 823771c5f..0392a7de0 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -10,6 +10,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
 
diff --git a/app/meson.build b/app/meson.build
index 0f7fe9464..408676b06 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -15,6 +15,7 @@ apps = [
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-fib',
+	'test-flow-perf',
 	'test-pipeline',
 	'test-pmd',
 	'test-sad']
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 000000000..db043c17a
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = dpdk-test-flow-perf
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
new file mode 100644
index 000000000..816863de2
--- /dev/null
+++ b/app/test-flow-perf/config.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file will hold the user parameters values
+ */
+
+/* Configuration */
+#define RXQ_NUM 4
+#define TXQ_NUM 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 000000000..7a924cdb7
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <sys/time.h>
+
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void
+usage(char *progname)
+{
+	printf("\nusage: %s\n", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (!strcmp(lgopts[opt_idx].name, "help")) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			printf("Invalid option: %s\n", argv[optind]);
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t i;
+	uint16_t port_id;
+	uint16_t nr_ports;
+	struct rte_eth_conf port_conf = {
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+				ETH_RSS_IP  |
+				ETH_RSS_TCP,
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	nr_ports = rte_eth_dev_count_avail();
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"Error during getting device"
+				" (port %u) info: %s\n",
+				port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
+
+		printf(":: initializing port: %d\n", port_id);
+
+		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
+				TXQ_NUM, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				":: cannot configure device: err=%d, port=%u\n",
+				ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		rxq_conf.offloads = port_conf.rxmode.offloads;
+
+		for (i = 0; i < RXQ_NUM; i++) {
+			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
+					rte_eth_dev_socket_id(port_id),
+					&rxq_conf,
+					mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					":: Rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		txq_conf.offloads = port_conf.txmode.offloads;
+
+		for (i = 0; i < TXQ_NUM; i++) {
+			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
+					rte_eth_dev_socket_id(port_id),
+					&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					":: Tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		/* Catch all packets from traffic generator. */
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				":: promiscuous mode enable failed: err=%s, port=%u\n",
+				rte_strerror(-ret), port_id);
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	uint16_t port;
+	struct rte_flow_error error;
+
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_ETH_FOREACH_DEV(port) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 000000000..25711378f
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+sources = files(
+	'main.c',
+)
+
+deps += ['ethdev']
diff --git a/config/common_base b/config/common_base
index 14000ba07..b2edd5267 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1105,6 +1105,11 @@ CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
+
 #
 # Compile the bbdev test application
 #
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index b124c3f28..258b1e03e 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -212,6 +212,16 @@ New Features
   * Added IPsec inbound load-distribution support for ipsec-secgw application
     using NIC load distribution feature(Flow Director).
 
+* **Added flow performance application.**
+
+  Add new application to test rte_flow performance.
+
+  Application features:
+  * Measure rte_flow insertion rate.
+  * Measure rte_flow deletion rate.
+  * Dump rte_flow memory consumption.
+  * Measure packet per second forwarding.
+
 
 Removed Items
 -------------
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 000000000..49eb450ae
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,44 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+Flow performance tool
+=====================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section,
+with a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./dpdk-test-flow-perf -n 4 -w 08:00.0 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864..7279daebc 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-05-06 12:36                 ` Wisam Jaddo
  2020-05-06 15:23                   ` Andrew Rybchenko
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 3/5] app/flow-perf: add deletion " Wisam Jaddo
                                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile      |   3 +
 app/test-flow-perf/actions_gen.c |  88 ++++++++
 app/test-flow-perf/actions_gen.h |  53 +++++
 app/test-flow-perf/config.h      |  15 ++
 app/test-flow-perf/flow_gen.c    | 179 +++++++++++++++
 app/test-flow-perf/flow_gen.h    |  63 ++++++
 app/test-flow-perf/items_gen.c   | 265 ++++++++++++++++++++++
 app/test-flow-perf/items_gen.h   |  67 ++++++
 app/test-flow-perf/main.c        | 369 ++++++++++++++++++++++++++++++-
 app/test-flow-perf/meson.build   |   3 +
 doc/guides/tools/flow-perf.rst   | 185 +++++++++++++++-
 11 files changed, 1283 insertions(+), 7 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index db043c17a..4f2db7591 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -16,6 +16,9 @@ CFLAGS += $(WERROR_FLAGS)
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 000000000..fa60084cf
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ **/
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#define ALLOCATE_ACTION_VARS
+#include "actions_gen.h"
+#include "config.h"
+
+void
+gen_mark(void)
+{
+	mark_action.id = MARK_ID;
+}
+
+void
+gen_queue(uint16_t queue)
+{
+	queue_action.index = queue;
+}
+
+void
+gen_jump(uint16_t next_table)
+{
+	jump_action.group = next_table;
+}
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number)
+{
+	uint16_t queue;
+	struct action_rss_data *rss_data;
+
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = ETH_RSS_IP |
+				ETH_RSS_TCP,
+			.key_len = sizeof(rss_data->key),
+			.queue_num = queues_number,
+			.key = rss_data->key,
+			.queue = rss_data->queue,
+		},
+		.key = { 1 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+}
+
+void
+gen_set_meta(void)
+{
+	meta_action.data = RTE_BE32(META_DATA);
+	meta_action.mask = RTE_BE32(0xffffffff);
+}
+
+void
+gen_set_tag(void)
+{
+	tag_action.data = RTE_BE32(META_DATA);
+	tag_action.mask = RTE_BE32(0xffffffff);
+	tag_action.index = TAG_INDEX;
+}
+
+void
+gen_port_id(void)
+{
+	port_id.id = PORT_ID_DST;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 000000000..b2980c1fb
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ */
+
+#ifndef FLOW_PERF_ACTION_GEN
+#define FLOW_PERF_ACTION_GEN
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[40];
+	uint16_t queue[128];
+};
+
+/* Current design is single threaded. */
+#ifdef ALLOCATE_ACTION_VARS
+#define EXTERN
+#else
+#define EXTERN extern
+#endif
+EXTERN struct rte_flow_action_mark mark_action;
+EXTERN struct rte_flow_action_queue queue_action;
+EXTERN struct rte_flow_action_jump jump_action;
+EXTERN struct rte_flow_action_rss *rss_action;
+EXTERN struct rte_flow_action_set_meta meta_action;
+EXTERN struct rte_flow_action_set_tag tag_action;
+EXTERN struct rte_flow_action_port_id port_id;
+
+void
+gen_mark(void);
+
+void
+gen_queue(uint16_t queue);
+
+void
+gen_jump(uint16_t next_table);
+
+void
+gen_rss(uint16_t *queues, uint16_t queues_number);
+
+void
+gen_set_meta(void);
+
+void
+gen_set_tag(void);
+
+void
+gen_port_id(void);
+
+#endif /* FLOW_PERF_ACTION_GEN */
diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
index 816863de2..d0be07d7f 100644
--- a/app/test-flow-perf/config.h
+++ b/app/test-flow-perf/config.h
@@ -12,3 +12,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/* Items/Actions parameters */
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/* Flow items/acctions max size */
+#define MAX_ITEMS_NUM 20
+#define MAX_ACTIONS_NUM 20
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 000000000..cf5453586
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "config.h"
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint8_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint16_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	if (flow_items & META_ITEM)
+		add_meta_data(items, items_counter++);
+	if (flow_items & TAG_ITEM)
+		add_meta_tag(items, items_counter++);
+	if (flow_items & ETH_ITEM)
+		add_ether(items, items_counter++);
+	if (flow_items & VLAN_ITEM)
+		add_vlan(items, items_counter++);
+	if (flow_items & IPV4_ITEM)
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & IPV6_ITEM)
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & TCP_ITEM)
+		add_tcp(items, items_counter++);
+	if (flow_items & UDP_ITEM)
+		add_udp(items, items_counter++);
+	if (flow_items & VXLAN_ITEM)
+		add_vxlan(items, items_counter++);
+	if (flow_items & VXLAN_GPE_ITEM)
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & GRE_ITEM)
+		add_gre(items, items_counter++);
+	if (flow_items & GENEVE_ITEM)
+		add_geneve(items, items_counter++);
+	if (flow_items & GTP_ITEM)
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
+	uint16_t flow_actions, uint32_t counter, uint16_t next_table,
+	uint16_t hairpinq)
+{
+	struct rte_flow_action_count count_action;
+	uint8_t actions_counter = 0;
+	uint16_t queues[RXQ_NUM];
+	uint16_t i;
+
+	/* None-fate actions */
+	if (flow_actions & MARK_ACTION) {
+		if (!counter)
+			gen_mark();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+		actions[actions_counter++].conf = &mark_action;
+	}
+	if (flow_actions & COUNT_ACTION) {
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+		actions[actions_counter++].conf = &count_action;
+	}
+	if (flow_actions & META_ACTION) {
+		if (!counter)
+			gen_set_meta();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+		actions[actions_counter++].conf = &meta_action;
+	}
+	if (flow_actions & TAG_ACTION) {
+		if (!counter)
+			gen_set_tag();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+		actions[actions_counter++].conf = &tag_action;
+	}
+
+	/* Fate actions */
+	if (flow_actions & QUEUE_ACTION) {
+		gen_queue(counter % RXQ_NUM);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & RSS_ACTION) {
+		if (!counter) {
+			for (i = 0; i < RXQ_NUM; i++)
+				queues[i] = i;
+			gen_rss(queues, RXQ_NUM);
+		}
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+	if (flow_actions & JUMP_ACTION) {
+		if (!counter)
+			gen_jump(next_table);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+		actions[actions_counter++].conf = &jump_action;
+	}
+	if (flow_actions & PORT_ID_ACTION) {
+		if (!counter)
+			gen_port_id();
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+		actions[actions_counter++].conf = &port_id;
+	}
+	if (flow_actions & DROP_ACTION)
+		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
+		gen_queue((counter % hairpinq) + RXQ_NUM);
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+		actions[actions_counter++].conf = &queue_action;
+	}
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		if (!counter) {
+			uint16_t hairpin_queues[hairpinq];
+			for (i = 0; i < hairpinq; i++)
+				hairpin_queues[i] = i + RXQ_NUM;
+			gen_rss(hairpin_queues, hairpinq);
+		}
+		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+		actions[actions_counter++].conf = rss_action;
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	uint16_t hairpinq,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+			outer_ip_src, next_table, hairpinq);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 000000000..43d9e7cfe
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ */
+
+#ifndef FLOW_PERF_FLOW_GEN
+#define FLOW_PERF_FLOW_GEN
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+#define FLOW_ITEM_MASK(_x) (UINT64_C(1) << _x)
+
+/* Items */
+#define ETH_ITEM             FLOW_ITEM_MASK(0)
+#define IPV4_ITEM            FLOW_ITEM_MASK(1)
+#define IPV6_ITEM            FLOW_ITEM_MASK(2)
+#define VLAN_ITEM            FLOW_ITEM_MASK(3)
+#define TCP_ITEM             FLOW_ITEM_MASK(4)
+#define UDP_ITEM             FLOW_ITEM_MASK(5)
+#define VXLAN_ITEM           FLOW_ITEM_MASK(6)
+#define VXLAN_GPE_ITEM       FLOW_ITEM_MASK(7)
+#define GRE_ITEM             FLOW_ITEM_MASK(8)
+#define GENEVE_ITEM          FLOW_ITEM_MASK(9)
+#define GTP_ITEM             FLOW_ITEM_MASK(10)
+#define META_ITEM            FLOW_ITEM_MASK(11)
+#define TAG_ITEM             FLOW_ITEM_MASK(12)
+
+/* Actions */
+#define QUEUE_ACTION         FLOW_ITEM_MASK(0)
+#define MARK_ACTION          FLOW_ITEM_MASK(1)
+#define JUMP_ACTION          FLOW_ITEM_MASK(2)
+#define RSS_ACTION           FLOW_ITEM_MASK(3)
+#define COUNT_ACTION         FLOW_ITEM_MASK(4)
+#define META_ACTION          FLOW_ITEM_MASK(5)
+#define TAG_ACTION           FLOW_ITEM_MASK(6)
+#define DROP_ACTION          FLOW_ITEM_MASK(7)
+#define PORT_ID_ACTION       FLOW_ITEM_MASK(8)
+#define HAIRPIN_QUEUE_ACTION FLOW_ITEM_MASK(9)
+#define HAIRPIN_RSS_ACTION   FLOW_ITEM_MASK(10)
+
+/* Attributes */
+#define INGRESS              FLOW_ITEM_MASK(0)
+#define EGRESS               FLOW_ITEM_MASK(1)
+#define TRANSFER             FLOW_ITEM_MASK(2)
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint8_t flow_attrs,
+	uint16_t flow_items,
+	uint16_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	uint16_t hairpinq,
+	struct rte_flow_error *error);
+
+#endif /* FLOW_PERF_FLOW_GEN */
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 000000000..1e9479fb8
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "config.h"
+
+/* Current design is single threaded. */
+static struct rte_flow_item_eth eth_spec;
+static struct rte_flow_item_eth eth_mask;
+static struct rte_flow_item_vlan vlan_spec;
+static struct rte_flow_item_vlan vlan_mask;
+static struct rte_flow_item_ipv4 ipv4_spec;
+static struct rte_flow_item_ipv4 ipv4_mask;
+static struct rte_flow_item_ipv6 ipv6_spec;
+static struct rte_flow_item_ipv6 ipv6_mask;
+static struct rte_flow_item_udp udp_spec;
+static struct rte_flow_item_udp udp_mask;
+static struct rte_flow_item_tcp tcp_spec;
+static struct rte_flow_item_tcp tcp_mask;
+static struct rte_flow_item_vxlan vxlan_spec;
+static struct rte_flow_item_vxlan vxlan_mask;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+static struct rte_flow_item_gre gre_spec;
+static struct rte_flow_item_gre gre_mask;
+static struct rte_flow_item_geneve geneve_spec;
+static struct rte_flow_item_geneve geneve_mask;
+static struct rte_flow_item_gtp gtp_spec;
+static struct rte_flow_item_gtp gtp_mask;
+static struct rte_flow_item_meta meta_spec;
+static struct rte_flow_item_meta meta_mask;
+static struct rte_flow_item_tag tag_spec;
+static struct rte_flow_item_tag tag_mask;
+
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+	eth_spec.type = 0;
+	eth_mask.type = 0;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t vlan_value = VLAN_VALUE;
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4)
+{
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = 0xffffffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6)
+{
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+		sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 1,
+		sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags */
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint16_t proto = GRE_PROTO;
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = 0xffff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t vni_value = VNI_VALUE;
+	uint8_t i;
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t teid_value = TEID_VALUE;
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter)
+{
+	uint32_t data = META_DATA;
+	uint8_t index = TAG_INDEX;
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 000000000..ee5dc81fe
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the items related methods
+ */
+
+#ifndef FLOW_PERF_ITEMS_GEN
+#define FLOW_PERF_ITEMS_GEN
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+void
+add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, uint32_t src_ipv4);
+
+void
+add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter, int src_ipv6);
+
+void
+add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+void
+add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
+	uint8_t items_counter);
+
+#endif /* FLOW_PERF_ITEMS_GEN */
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 7a924cdb7..463e4a782 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -35,29 +35,156 @@
 #include <rte_flow.h>
 
 #include "config.h"
+#include "flow_gen.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS             100
+#define DEFAULT_RULES_COUNT    4000000
+#define DEFAULT_ITERATION       100000
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint16_t flow_items;
+static uint16_t flow_actions;
+static uint8_t flow_attrs;
+static volatile bool force_quit;
+static bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
+static uint32_t hairpinq;
 
 static void
 usage(char *progname)
 {
 	printf("\nusage: %s\n", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss=N: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt, i;
 	int opt_idx;
+	static const char * const items_str[] = {
+		"ether", "vlan", "ipv4", "ipv6",
+		"tcp", "udp", "vxlan", "vxlan-gpe",
+		"gre", "geneve", "gtp", "meta",
+		"tag"
+	};
+	uint32_t items[] = {
+		ETH_ITEM, VLAN_ITEM, IPV4_ITEM,
+		IPV6_ITEM, TCP_ITEM, UDP_ITEM,
+		VXLAN_ITEM, VXLAN_GPE_ITEM, GRE_ITEM,
+		GENEVE_ITEM, GTP_ITEM, META_ITEM,
+		TAG_ITEM
+	};
+	static const char * const attributes_str[] = {
+		"ingress", "egress", "transfer"
+	};
+	uint32_t attributes[] = {
+		INGRESS, EGRESS, TRANSFER
+	};
+	static const char * const actions_str[] = {
+		"port-id", "rss", "queue", "jump",
+		"mark", "count", "set-meta", "set-tag",
+		"drop",
+	};
+	uint32_t actions[] = {
+		PORT_ID_ACTION, RSS_ACTION, QUEUE_ACTION,
+		JUMP_ACTION, MARK_ACTION, COUNT_ACTION,
+		META_ACTION, TAG_ACTION, DROP_ACTION
+	};
+	int items_size = RTE_DIM(items);
+	int attributes_size = RTE_DIM(attributes);
+	int actions_size = RTE_DIM(actions);
+
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              1, 0, 0 },
+		{ "hairpin-rss",                1, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
+	hairpinq = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -66,6 +193,73 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+
+			/* Attributes */
+			for (i = 0; i < attributes_size; i++)
+				if (!strcmp(lgopts[opt_idx].name,
+						attributes_str[i])) {
+					flow_attrs |= attributes[i];
+					printf("%s / ", attributes_str[i]);
+				}
+			if (!strcmp(lgopts[opt_idx].name, "group")) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+
+			/* Items */
+			for (i = 0; i < items_size; i++)
+				if (!strcmp(lgopts[opt_idx].name,
+						items_str[i])) {
+					flow_items |= items[i];
+					printf("%s / ", items_str[i]);
+				}
+
+			/* Actions */
+			for (i = 0; i < actions_size; i++)
+				if (!strcmp(lgopts[opt_idx].name,
+						actions_str[i])) {
+					flow_actions |= actions[i];
+					printf("%s / ", actions_str[i]);
+				}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
+				n = atoi(optarg);
+				if (n > 0)
+					hairpinq = n;
+				else
+					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
+
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
+				n = atoi(optarg);
+				if (n > 0)
+					hairpinq = n;
+				else
+					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
+
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+
+			/* Control */
+			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
+				dump_iterations = true;
 			break;
 		default:
 			printf("Invalid option: %s\n", argv[optind]);
@@ -74,15 +268,141 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		cpu_time_used = 0;
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jump into flow_group,
+			 * this way the app will avoid the default rules.
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
+				JUMP_ACTION, flow_group, 0, 0, &error);
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i, hairpinq, &error);
+				if (!flow)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
 init_port(void)
 {
 	int ret;
-	uint16_t i;
+	uint16_t i, j;
 	uint16_t port_id;
 	uint16_t nr_ports;
+	uint16_t nr_queues;
+	struct rte_eth_hairpin_conf hairpin_conf = {
+		.peer_count = 1,
+	};
 	struct rte_eth_conf port_conf = {
 		.rx_adv_conf = {
 			.rss_conf.rss_hf =
@@ -94,6 +414,10 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQ_NUM;
+	if (hairpinq)
+		nr_queues = RXQ_NUM + hairpinq;
+
 	nr_ports = rte_eth_dev_count_avail();
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
@@ -118,8 +442,8 @@ init_port(void)
 
 		printf(":: initializing port: %d\n", port_id);
 
-		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
-				TXQ_NUM, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 				":: cannot configure device: err=%d, port=%u\n",
@@ -159,6 +483,30 @@ init_port(void)
 				":: promiscuous mode enable failed: err=%s, port=%u\n",
 				rte_strerror(-ret), port_id);
 
+		if (hairpinq) {
+			for (i = RXQ_NUM, j = 0; i < nr_queues; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + TXQ_NUM;
+				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
+					NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+
+			for (i = TXQ_NUM, j = 0; i < nr_queues; i++, j++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue = j + RXQ_NUM;
+				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
+					NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+		}
+
 		ret = rte_eth_dev_start(port_id);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
@@ -180,6 +528,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = DEFAULT_RULES_COUNT;
+	iterations_number = DEFAULT_ITERATION;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 	if (argc > 1)
@@ -191,6 +548,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
 		rte_eth_dev_stop(port);
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index 25711378f..6eaf83b41 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -2,6 +2,9 @@
 # Copyright(c) 2020 Mellanox Technologies, Ltd
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
 
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 49eb450ae..b45fccd69 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -4,7 +4,18 @@
 Flow performance tool
 =====================
 
-Application for rte_flow performance testing.
+Application for rte_flow performance testing. The application provide the
+ability to test insertion rate of specific rte_flow rule, by stressing it
+to the NIC, and calculate the insertion rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+The current design have single core insertion rate.
 
 
 Compiling the Application
@@ -36,9 +47,179 @@ with a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./dpdk-test-flow-perf -n 4 -w 08:00.0 --
+	sudo ./dpdk-test-flow_perf -n 4 -w 08:00.0 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue=N``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss=N``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 3/5] app/flow-perf: add deletion rate calculation
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-05-06 12:36                 ` " Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 4/5] app/flow-perf: add memory dump to app Wisam Jaddo
                                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 86 ++++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |  4 ++
 2 files changed, 90 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 463e4a782..1b42cffda 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,6 +49,7 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static bool dump_iterations;
+static  bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -64,6 +65,8 @@ usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -145,6 +148,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -260,6 +264,8 @@ args_parse(int argc, char **argv)
 			}
 			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
 				dump_iterations = true;
+			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
+				delete_flag = true;
 			break;
 		default:
 			printf("Invalid option: %s\n", argv[optind]);
@@ -279,9 +285,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (!flow_list[i])
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used;
@@ -293,6 +365,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -304,8 +377,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
 		cpu_time_used = 0;
+		flow_index = 0;
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jump into flow_group,
@@ -322,6 +401,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -345,6 +425,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -378,6 +460,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -532,6 +617,7 @@ main(int argc, char **argv)
 	dump_iterations = false;
 	flows_count = DEFAULT_RULES_COUNT;
 	iterations_number = DEFAULT_ITERATION;
+	delete_flag = false;
 	flow_group = 0;
 
 	signal(SIGINT, signal_handler);
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index b45fccd69..b64c23875 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -17,6 +17,8 @@ give different flow each time, and all other items will have open masks.
 
 The current design have single core insertion rate.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -63,6 +65,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 4/5] app/flow-perf: add memory dump to app
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
                                   ` (2 preceding siblings ...)
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 3/5] app/flow-perf: add deletion " Wisam Jaddo
@ 2020-05-06 12:36                 ` Wisam Jaddo
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 5/5] app/flow-perf: add packet forwarding support Wisam Jaddo
  2020-05-06 12:50                 ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Thomas Monjalon
  5 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde
  Cc: Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 71 +++++++++++++++++++++++++++++++++-
 doc/guides/tools/flow-perf.rst |  6 ++-
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 1b42cffda..c4e92f938 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,7 +49,8 @@ static uint16_t flow_actions;
 static uint8_t flow_attrs;
 static volatile bool force_quit;
 static bool dump_iterations;
-static  bool delete_flag;
+static bool delete_flag;
+static bool dump_socket_mem_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -67,6 +68,7 @@ usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: To dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -149,6 +151,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -266,6 +269,8 @@ args_parse(int argc, char **argv)
 				dump_iterations = true;
 			if (!strcmp(lgopts[opt_idx].name, "deletion-rate"))
 				delete_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			printf("Invalid option: %s\n", argv[optind]);
@@ -277,6 +282,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -608,6 +669,7 @@ main(int argc, char **argv)
 	int ret;
 	uint16_t port;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	ret = rte_eal_init(argc, argv);
 	if (ret < 0)
@@ -618,6 +680,7 @@ main(int argc, char **argv)
 	flows_count = DEFAULT_RULES_COUNT;
 	iterations_number = DEFAULT_ITERATION;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flow_group = 0;
 
 	signal(SIGINT, signal_handler);
@@ -634,7 +697,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index b64c23875..6e29cde79 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -17,7 +17,8 @@ give different flow each time, and all other items will have open masks.
 
 The current design have single core insertion rate.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -68,6 +69,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v5 5/5] app/flow-perf: add packet forwarding support
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
                                   ` (3 preceding siblings ...)
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 4/5] app/flow-perf: add memory dump to app Wisam Jaddo
@ 2020-05-06 12:36                 ` Wisam Jaddo
  2020-05-06 12:50                 ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Thomas Monjalon
  5 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-06 12:36 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c      | 300 +++++++++++++++++++++++++++++++++
 doc/guides/tools/flow-perf.rst |   6 +
 2 files changed, 306 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index c4e92f938..06afd3afa 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -27,6 +27,7 @@
 #include <signal.h>
 #include <stdbool.h>
 #include <sys/time.h>
+#include <unistd.h>
 
 #include <rte_malloc.h>
 #include <rte_mempool.h>
@@ -47,15 +48,45 @@ static uint8_t flow_group;
 static uint16_t flow_items;
 static uint16_t flow_actions;
 static uint8_t flow_attrs;
+
 static volatile bool force_quit;
 static bool dump_iterations;
 static bool delete_flag;
 static bool dump_socket_mem_flag;
+static bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
 static uint32_t hairpinq;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST    32
+#define LCORE_MODE_PKT    1
+#define LCORE_MODE_STATS  2
+#define MAX_STREAMS      64
+#define MAX_LCORES       64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void
 usage(char *progname)
@@ -69,6 +100,8 @@ usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: To dump all socket memory\n");
+	printf("  --enable-fwd: To enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -152,6 +185,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -271,6 +305,8 @@ args_parse(int argc, char **argv)
 				delete_flag = true;
 			if (!strcmp(lgopts[opt_idx].name, "dump-socket-mem"))
 				dump_socket_mem_flag = true;
+			if (!strcmp(lgopts[opt_idx].name, "enable-fwd"))
+				enable_fwd = true;
 			break;
 		default:
 			printf("Invalid option: %s\n", argv[optind]);
@@ -538,6 +574,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;
+
+		sleep(1);
+
+		if (nr_lines) {
+			char go_up_nr_lines[16];
+
+			sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
+			printf("%s\r", go_up_nr_lines);
+		}
+
+		printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
+		printf("%6s %16s %16s %16s\n", "------", "----------------",
+			"----------------", "----------------");
+		nr_lines = 3;
+		for (i = 0; i < MAX_LCORES; i++) {
+			li  = &lcore_infos[i];
+			oli = &old[i];
+			if (li->mode != LCORE_MODE_PKT)
+				continue;
+
+			tx_delta    = li->tx_pkts  - oli->tx_pkts;
+			rx_delta    = li->rx_pkts  - oli->rx_pkts;
+			drops_delta = li->tx_drops - oli->tx_drops;
+			printf("%6d %16s %16s %16s\n", i,
+				pretty_number(tx_delta,    buf[0]),
+				pretty_number(drops_delta, buf[1]),
+				pretty_number(rx_delta,    buf[2]));
+
+			total_tx_pkts  += tx_delta;
+			total_rx_pkts  += rx_delta;
+			total_tx_drops += drops_delta;
+
+			nr_valid_core++;
+			nr_lines += 1;
+		}
+
+		if (nr_valid_core > 1) {
+			printf("%6s %16s %16s %16s\n", "total",
+				pretty_number(total_tx_pkts,  buf[0]),
+				pretty_number(total_tx_drops, buf[1]),
+				pretty_number(total_rx_pkts,  buf[2]));
+			nr_lines += 1;
+		}
+
+		memcpy(old, lcore_infos,
+			sizeof(struct lcore_info) * MAX_LCORES);
+	}
+}
+
+static int
+start_forwarding(void *data __rte_unused)
+{
+	int lcore = rte_lcore_id();
+	int stream_id;
+	uint16_t cnt;
+	struct lcore_info *li = &lcore_infos[lcore];
+
+	if (!li->mode)
+		return 0;
+
+	if (li->mode == LCORE_MODE_STATS) {
+		printf(":: started stats on lcore %u\n", lcore);
+		packet_per_second_stats();
+		return 0;
+	}
+
+	while (!force_quit)
+		for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
+			if (li->streams[stream_id].rx_port == -1)
+				continue;
+
+			cnt = do_rx(li,
+					li->streams[stream_id].rx_port,
+					li->streams[stream_id].rx_queue);
+			if (cnt)
+				do_tx(li, cnt,
+					li->streams[stream_id].tx_port,
+					li->streams[stream_id].tx_queue);
+		}
+	return 0;
+}
+
+static void
+init_lcore_info(void)
+{
+	int i, j;
+	unsigned int lcore;
+	uint16_t nr_port;
+	uint16_t queue;
+	int port;
+	int stream_id = 0;
+	int streams_per_core;
+	int unassigned_streams;
+	int nb_fwd_streams;
+	nr_port = rte_eth_dev_count_avail();
+
+	/* First logical core is reserved for stats printing */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	lcore_infos[lcore].mode = LCORE_MODE_STATS;
+
+	/*
+	 * Initialize all cores
+	 * All cores at first must have -1 value in all streams
+	 * This means that this stream is not used, or not set
+	 * yet.
+	 */
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			lcore_infos[i].streams[j].tx_port = -1;
+			lcore_infos[i].streams[j].rx_port = -1;
+			lcore_infos[i].streams[j].tx_queue = -1;
+			lcore_infos[i].streams[j].rx_queue = -1;
+			lcore_infos[i].streams_nb = 0;
+		}
+
+	/*
+	 * Calculate the total streams count.
+	 * Also distribute those streams count between the available
+	 * logical cores except first core, since it's reserved for
+	 * stats prints.
+	 */
+	nb_fwd_streams = nr_port * RXQ_NUM;
+	if ((int)(nb_lcores - 1) >= nb_fwd_streams)
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = 1;
+		}
+	else {
+		streams_per_core = nb_fwd_streams / (nb_lcores - 1);
+		unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
+		for (i = 0; i < (int)(nb_lcores - 1); i++) {
+			lcore = rte_get_next_lcore(lcore, 0, 0);
+			lcore_infos[lcore].streams_nb = streams_per_core;
+			if (unassigned_streams) {
+				lcore_infos[lcore].streams_nb++;
+				unassigned_streams--;
+			}
+		}
+	}
+
+	/*
+	 * Set the streams for the cores according to each logical
+	 * core stream count.
+	 * The streams is built on the design of what received should
+	 * forward as well, this means that if you received packets on
+	 * port 0 queue 0 then the same queue should forward the
+	 * packets, using the same logical core.
+	 */
+	lcore = rte_get_next_lcore(-1, 0, 0);
+	for (port = 0; port < nr_port; port++) {
+		/* Create FWD stream */
+		for (queue = 0; queue < RXQ_NUM; queue++) {
+			if (!lcore_infos[lcore].streams_nb ||
+				!(stream_id % lcore_infos[lcore].streams_nb)) {
+				lcore = rte_get_next_lcore(lcore, 0, 0);
+				lcore_infos[lcore].mode = LCORE_MODE_PKT;
+				stream_id = 0;
+			}
+			lcore_infos[lcore].streams[stream_id].rx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].tx_queue = queue;
+			lcore_infos[lcore].streams[stream_id].rx_port = port;
+			lcore_infos[lcore].streams[stream_id].tx_port = port;
+			stream_id++;
+		}
+	}
+
+	/* Print all streams */
+	printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
+	for (i = 0; i < MAX_LCORES; i++)
+		for (j = 0; j < MAX_STREAMS; j++) {
+			/* No streams for this core */
+			if (lcore_infos[i].streams[j].tx_port == -1)
+				break;
+			printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
+				i,
+				lcore_infos[i].streams[j].rx_port,
+				lcore_infos[i].streams[j].rx_queue,
+				lcore_infos[i].streams[j].tx_port,
+				lcore_infos[i].streams[j].tx_queue);
+		}
+}
+
 static void
 init_port(void)
 {
@@ -705,6 +1000,11 @@ main(int argc, char **argv)
 		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
 		(alloc - last_alloc) / 1.0e6);
 
+	if (enable_fwd) {
+		init_lcore_info();
+		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
+	}
+
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
 		rte_eth_dev_stop(port);
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 6e29cde79..d86943947 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -20,6 +20,8 @@ The current design have single core insertion rate.
 The application also provide the ability to measure rte flow deletion rate,
 in addition to memory consumption before and after the flows creation.
 
+The app supports single and multi core performance measurements.
+
 
 Compiling the Application
 =========================
@@ -72,6 +74,10 @@ The command line options are:
 *	``--dump-socket-mem``
 	Dump the memory stats for each socket before the insertion and after.
 
+*	``enable-fwd``
+	Enable packets forwarding after insertion/deletion operations.
+
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application
  2020-05-06 12:36               ` [dpdk-dev] [PATCH v5 0/5] Introduce flow perf application Wisam Jaddo
                                   ` (4 preceding siblings ...)
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 5/5] app/flow-perf: add packet forwarding support Wisam Jaddo
@ 2020-05-06 12:50                 ` Thomas Monjalon
  5 siblings, 0 replies; 102+ messages in thread
From: Thomas Monjalon @ 2020-05-06 12:50 UTC (permalink / raw)
  To: dev
  Cc: jackmin, jerinjacobk, gerlitz.or, l.yan, ajit.khaparde,
	Wisam Jaddo, ferruh.yigit, ktraynor, arybchenko, wenzhuo.lu

+Cc more maintainers for review

06/05/2020 14:36, Wisam Jaddo:
> Add new application to test rte flow performance from:
> - Insertion rate.
> - Deletion rate.
> - Memory consumption.
> - PPS forward measurement.
> 
> ---
> v5:
> * Add app to 20.05 release notes.
> * Addressing comments.
> * Fix compilation issue for gcc >= 10.
> * Fix documentation.
> * Remove unneeded CFLAGS.
> * Remove unused includes.
> * Addressing format comments.
> * Move hairpin to be option use only.
> * Use RSS hash IP + TCP in ports and rss action.
> * Introduce and use new macro for bit flags.
> 
> v4:
> * Fix compilation error due to variable set but not used.
> 
> v3:
> * Fix passing hairpin queues to hairpin rss action.
> 
> v2:
> * reset cpu_time_used every port.
> * generate different RSS action every flow with different RETA.
> * Fix in commit log message
> 
> Wisam Jaddo (5):
>   app/flow-perf: add flow performance skeleton
>   app/flow-perf: add insertion rate calculation
>   app/flow-perf: add deletion rate calculation
>   app/flow-perf: add memory dump to app
>   app/flow-perf: add packet forwarding support
> 
>  MAINTAINERS                            |    5 +
>  app/Makefile                           |    1 +
>  app/meson.build                        |    1 +
>  app/test-flow-perf/Makefile            |   26 +
>  app/test-flow-perf/actions_gen.c       |   88 ++
>  app/test-flow-perf/actions_gen.h       |   53 ++
>  app/test-flow-perf/config.h            |   29 +
>  app/test-flow-perf/flow_gen.c          |  179 +++++
>  app/test-flow-perf/flow_gen.h          |   63 ++
>  app/test-flow-perf/items_gen.c         |  265 +++++++
>  app/test-flow-perf/items_gen.h         |   67 ++
>  app/test-flow-perf/main.c              | 1014 ++++++++++++++++++++++++
>  app/test-flow-perf/meson.build         |   11 +
>  config/common_base                     |    5 +
>  doc/guides/rel_notes/release_20_05.rst |   10 +
>  doc/guides/tools/flow-perf.rst         |  239 ++++++
>  doc/guides/tools/index.rst             |    1 +
>  17 files changed, 2057 insertions(+)
>  create mode 100644 app/test-flow-perf/Makefile
>  create mode 100644 app/test-flow-perf/actions_gen.c
>  create mode 100644 app/test-flow-perf/actions_gen.h
>  create mode 100644 app/test-flow-perf/config.h
>  create mode 100644 app/test-flow-perf/flow_gen.c
>  create mode 100644 app/test-flow-perf/flow_gen.h
>  create mode 100644 app/test-flow-perf/items_gen.c
>  create mode 100644 app/test-flow-perf/items_gen.h
>  create mode 100644 app/test-flow-perf/main.c
>  create mode 100644 app/test-flow-perf/meson.build
>  create mode 100644 doc/guides/tools/flow-perf.rst
> 
> 






^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-05-06 14:25                   ` Andrew Rybchenko
  2020-05-06 17:07                     ` Wisam Monther
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-11 11:09                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
  2 siblings, 1 reply; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-06 14:25 UTC (permalink / raw)
  To: Wisam Jaddo, dev, jackmin, thomas, jerinjacobk, gerlitz.or,
	l.yan, ajit.khaparde

On 5/6/20 3:36 PM, Wisam Jaddo wrote:
> Add flow performance application skeleton.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---

[snip]

> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> new file mode 100644
> index 000000000..7a924cdb7
> --- /dev/null
> +++ b/app/test-flow-perf/main.c
> @@ -0,0 +1,200 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * This file contain the application main file
> + * This application provides the user the ability to test the
> + * insertion rate for specific rte_flow rule under stress state ~4M rule/
> + *
> + * Then it will also provide packet per second measurement after installing
> + * all rules, the user may send traffic to test the PPS that match the rules
> + * after all rules are installed, to check performance or functionality after
> + * the stress.
> + *
> + * The flows insertion will go for all ports first, then it will print the
> + * results, after that the application will go into forwarding packets mode
> + * it will start receiving traffic if any and then forwarding it back and
> + * gives packet per second measurement.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <stdarg.h>
> +#include <errno.h>
> +#include <getopt.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <sys/time.h>
> +
> +#include <rte_malloc.h>
> +#include <rte_mempool.h>
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +#include <rte_flow.h>
> +
> +#include "config.h"
> +
> +static uint32_t nb_lcores;
> +static struct rte_mempool *mbuf_mp;
> +
> +static void
> +usage(char *progname)
> +{
> +	printf("\nusage: %s\n", progname);
> +}
> +
> +static void
> +args_parse(int argc, char **argv)
> +{
> +	char **argvopt;
> +	int opt;
> +	int opt_idx;
> +	static struct option lgopts[] = {
> +		/* Control */
> +		{ "help",                       0, 0, 0 },
> +	};
> +
> +	argvopt = argv;
> +
> +	while ((opt = getopt_long(argc, argvopt, "",
> +				lgopts, &opt_idx)) != EOF) {
> +		switch (opt) {
> +		case 0:
> +			if (!strcmp(lgopts[opt_idx].name, "help")) {

DPDK coding style recommends to compare vs 0 instead of logical
not.

> +				usage(argv[0]);
> +				rte_exit(EXIT_SUCCESS, "Displayed help\n");
> +			}
> +			break;
> +		default:
> +			printf("Invalid option: %s\n", argv[optind]);

Again, sorry if I missed reply: Why error is not logged to
stderr?

> +			usage(argv[0]);
> +			rte_exit(EXIT_SUCCESS, "Invalid option\n");
> +			break;
> +		}
> +	}
> +}
> +
> +static void
> +init_port(void)
> +{
> +	int ret;
> +	uint16_t i;
> +	uint16_t port_id;
> +	uint16_t nr_ports;
> +	struct rte_eth_conf port_conf = {
> +		.rx_adv_conf = {
> +			.rss_conf.rss_hf =
> +				ETH_RSS_IP  |
> +				ETH_RSS_TCP,
> +		}
> +	};
> +	struct rte_eth_txconf txq_conf;
> +	struct rte_eth_rxconf rxq_conf;
> +	struct rte_eth_dev_info dev_info;
> +
> +	nr_ports = rte_eth_dev_count_avail();
> +	if (nr_ports == 0)
> +		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
> +
> +	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
> +					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
> +					0, MBUF_SIZE,
> +					rte_socket_id());
> +	if (mbuf_mp == NULL)
> +		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
> +
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		ret = rte_eth_dev_info_get(port_id, &dev_info);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				"Error during getting device"
> +				" (port %u) info: %s\n",
> +				port_id, strerror(-ret));
> +
> +		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
> +		port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
> +
> +		printf(":: initializing port: %d\n", port_id);
> +
> +		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
> +				TXQ_NUM, &port_conf);
> +		if (ret < 0)
> +			rte_exit(EXIT_FAILURE,
> +				":: cannot configure device: err=%d, port=%u\n",
> +				ret, port_id);
> +
> +		rxq_conf = dev_info.default_rxconf;
> +		rxq_conf.offloads = port_conf.rxmode.offloads;


As far as I know there is no necessity to repeat port offlaod
on queue level. So, the line is not necesary.

> +
> +		for (i = 0; i < RXQ_NUM; i++) {
> +			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
> +					rte_eth_dev_socket_id(port_id),
> +					&rxq_conf,
> +					mbuf_mp);
> +			if (ret < 0)
> +				rte_exit(EXIT_FAILURE,
> +					":: Rx queue setup failed: err=%d, port=%u\n",
> +					ret, port_id);
> +		}
> +
> +		txq_conf = dev_info.default_txconf;
> +		txq_conf.offloads = port_conf.txmode.offloads;

As far as I know there is no necessity to repeat port offlaod
on queue level. So, the line is not necesary.

> +
> +		for (i = 0; i < TXQ_NUM; i++) {
> +			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
> +					rte_eth_dev_socket_id(port_id),
> +					&txq_conf);
> +			if (ret < 0)
> +				rte_exit(EXIT_FAILURE,
> +					":: Tx queue setup failed: err=%d, port=%u\n",
> +					ret, port_id);
> +		}
> +
> +		/* Catch all packets from traffic generator. */
> +		ret = rte_eth_promiscuous_enable(port_id);
> +		if (ret != 0)
> +			rte_exit(EXIT_FAILURE,
> +				":: promiscuous mode enable failed: err=%s, port=%u\n",
> +				rte_strerror(-ret), port_id);
> +
> +		ret = rte_eth_dev_start(port_id);
> +		if (ret < 0)
> +			rte_exit(EXIT_FAILURE,
> +				"rte_eth_dev_start:err=%d, port=%u\n",
> +				ret, port_id);
> +
> +		printf(":: initializing port: %d done\n", port_id);
> +	}
> +}
> +
> +int
> +main(int argc, char **argv)
> +{
> +	int ret;
> +	uint16_t port;
> +	struct rte_flow_error error;
> +
> +	ret = rte_eal_init(argc, argv);
> +	if (ret < 0)
> +		rte_exit(EXIT_FAILURE, "EAL init failed\n");
> +
> +	argc -= ret;
> +	argv += ret;
> +	if (argc > 1)
> +		args_parse(argc, argv);
> +
> +	init_port();
> +
> +	nb_lcores = rte_lcore_count();
> +	if (nb_lcores <= 1)
> +		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
> +
> +	RTE_ETH_FOREACH_DEV(port) {
> +		rte_flow_flush(port, &error);
> +		rte_eth_dev_stop(port);
> +		rte_eth_dev_close(port);
> +	}
> +	return 0;
> +}

[snip]

> diff --git a/config/common_base b/config/common_base
> index 14000ba07..b2edd5267 100644
> diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
> index b124c3f28..258b1e03e 100644
> --- a/doc/guides/rel_notes/release_20_05.rst
> +++ b/doc/guides/rel_notes/release_20_05.rst
> @@ -212,6 +212,16 @@ New Features
>    * Added IPsec inbound load-distribution support for ipsec-secgw application
>      using NIC load distribution feature(Flow Director).
>  
> +* **Added flow performance application.**
> +
> +  Add new application to test rte_flow performance.
> +
> +  Application features:
> +  * Measure rte_flow insertion rate.
> +  * Measure rte_flow deletion rate.
> +  * Dump rte_flow memory consumption.
> +  * Measure packet per second forwarding.

I think above lines should be added in appropriate patches
which really do it.

> +
>  
>  Removed Items
>  -------------

[snip]


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-05-06 15:23                   ` Andrew Rybchenko
  2020-05-07 12:38                     ` Wisam Monther
  0 siblings, 1 reply; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-06 15:23 UTC (permalink / raw)
  To: Wisam Jaddo, dev, jackmin, thomas, jerinjacobk, gerlitz.or,
	l.yan, ajit.khaparde
  Cc: Stephen Hemminger, david.marchand

My biggest concert with the patch is usage of huge number
of global variables which makes the code hard to read,
understand and maintain. See my notes below.
Please, share your thoughts.

On 5/6/20 3:36 PM, Wisam Jaddo wrote:
> Add insertion rate calculation feature into flow
> performance application.
> 
> The application now provide the ability to test
> insertion rate of specific rte_flow rule, by
> stressing it to the NIC, and calculate the
> insertion rate.
> 
> The application offers some options in the command
> line, to configure which rule to apply.
> 
> After that the application will start producing
> rules with same pattern but increasing the outer IP
> source address by 1 each time, thus it will give
> different flow each time, and all other items will
> have open masks.
> 
> The current design have single core insertion rate.
> In the future we may have a multi core insertion
> rate measurement support in the app.
> 
> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
> ---

[snip]

> diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
> new file mode 100644
> index 000000000..fa60084cf
> --- /dev/null
> +++ b/app/test-flow-perf/actions_gen.c
> @@ -0,0 +1,88 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * The file contains the implementations of actions generators.
> + * Each generator is responsible for preparing it's action instance
> + * and initializing it with needed data.
> + **/
> +
> +#include <sys/types.h>
> +#include <rte_malloc.h>
> +#include <rte_flow.h>
> +#include <rte_ethdev.h>
> +
> +#define ALLOCATE_ACTION_VARS
> +#include "actions_gen.h"
> +#include "config.h"
> +
> +void
> +gen_mark(void)
> +{
> +	mark_action.id = MARK_ID;
> +}
> +
> +void
> +gen_queue(uint16_t queue)
> +{
> +	queue_action.index = queue;
> +}
> +
> +void
> +gen_jump(uint16_t next_table)
> +{
> +	jump_action.group = next_table;
> +}
> +
> +void
> +gen_rss(uint16_t *queues, uint16_t queues_number)
> +{
> +	uint16_t queue;
> +	struct action_rss_data *rss_data;
> +
> +	rss_data = rte_malloc("rss_data",
> +		sizeof(struct action_rss_data), 0);
> +
> +	if (rss_data == NULL)
> +		rte_exit(EXIT_FAILURE, "No Memory available!");
> +
> +	*rss_data = (struct action_rss_data){
> +		.conf = (struct rte_flow_action_rss){
> +			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +			.level = 0,
> +			.types = ETH_RSS_IP |
> +				ETH_RSS_TCP,

Duplicating it in two places suggests addition of a macro
with the value and usage in two places.

> +			.key_len = sizeof(rss_data->key),
> +			.queue_num = queues_number,
> +			.key = rss_data->key,
> +			.queue = rss_data->queue,
> +		},
> +		.key = { 1 },
> +		.queue = { 0 },
> +	};
> +
> +	for (queue = 0; queue < queues_number; queue++)
> +		rss_data->queue[queue] = queues[queue];
> +
> +	rss_action = &rss_data->conf;
> +}
> +
> +void
> +gen_set_meta(void)
> +{
> +	meta_action.data = RTE_BE32(META_DATA);
> +	meta_action.mask = RTE_BE32(0xffffffff);
> +}
> +
> +void
> +gen_set_tag(void)
> +{
> +	tag_action.data = RTE_BE32(META_DATA);
> +	tag_action.mask = RTE_BE32(0xffffffff);
> +	tag_action.index = TAG_INDEX;
> +}
> +
> +void
> +gen_port_id(void)
> +{
> +	port_id.id = PORT_ID_DST;
> +}
> 

[snip]

> diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
> new file mode 100644
> index 000000000..cf5453586
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.c
> @@ -0,0 +1,179 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * The file contains the implementations of the method to
> + * fill items, actions & attributes in their corresponding
> + * arrays, and then generate rte_flow rule.
> + *
> + * After the generation. The rule goes to validation then
> + * creation state and then return the results.
> + */
> +
> +#include <stdint.h>
> +
> +#include "flow_gen.h"
> +#include "items_gen.h"
> +#include "actions_gen.h"
> +#include "config.h"
> +
> +static void
> +fill_attributes(struct rte_flow_attr *attr,
> +	uint8_t flow_attrs, uint16_t group)
> +{
> +	if (flow_attrs & INGRESS)
> +		attr->ingress = 1;
> +	if (flow_attrs & EGRESS)
> +		attr->egress = 1;
> +	if (flow_attrs & TRANSFER)
> +		attr->transfer = 1;
> +	attr->group = group;
> +}
> +
> +static void
> +fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint16_t flow_items, uint32_t outer_ip_src)
> +{
> +	uint8_t items_counter = 0;
> +
> +	if (flow_items & META_ITEM)
> +		add_meta_data(items, items_counter++);
> +	if (flow_items & TAG_ITEM)
> +		add_meta_tag(items, items_counter++);
> +	if (flow_items & ETH_ITEM)
> +		add_ether(items, items_counter++);
> +	if (flow_items & VLAN_ITEM)
> +		add_vlan(items, items_counter++);
> +	if (flow_items & IPV4_ITEM)
> +		add_ipv4(items, items_counter++, outer_ip_src);
> +	if (flow_items & IPV6_ITEM)
> +		add_ipv6(items, items_counter++, outer_ip_src);
> +	if (flow_items & TCP_ITEM)
> +		add_tcp(items, items_counter++);
> +	if (flow_items & UDP_ITEM)
> +		add_udp(items, items_counter++);
> +	if (flow_items & VXLAN_ITEM)
> +		add_vxlan(items, items_counter++);
> +	if (flow_items & VXLAN_GPE_ITEM)
> +		add_vxlan_gpe(items, items_counter++);
> +	if (flow_items & GRE_ITEM)
> +		add_gre(items, items_counter++);
> +	if (flow_items & GENEVE_ITEM)
> +		add_geneve(items, items_counter++);
> +	if (flow_items & GTP_ITEM)
> +		add_gtp(items, items_counter++);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
> +}
> +
> +static void
> +fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
> +	uint16_t flow_actions, uint32_t counter, uint16_t next_table,
> +	uint16_t hairpinq)
> +{
> +	struct rte_flow_action_count count_action;
> +	uint8_t actions_counter = 0;
> +	uint16_t queues[RXQ_NUM];
> +	uint16_t i;
> +
> +	/* None-fate actions */
> +	if (flow_actions & MARK_ACTION) {
> +		if (!counter)

DPDK coding style says compare to 0 [1]. Many similar
comparisons below.

[1] https://doc.dpdk.org/guides/contributing/coding_style.html#null-pointers

> +			gen_mark();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
> +		actions[actions_counter++].conf = &mark_action;
> +	}
> +	if (flow_actions & COUNT_ACTION) {
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
> +		actions[actions_counter++].conf = &count_action;
> +	}
> +	if (flow_actions & META_ACTION) {
> +		if (!counter)
> +			gen_set_meta();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
> +		actions[actions_counter++].conf = &meta_action;
> +	}
> +	if (flow_actions & TAG_ACTION) {
> +		if (!counter)
> +			gen_set_tag();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
> +		actions[actions_counter++].conf = &tag_action;
> +	}
> +
> +	/* Fate actions */
> +	if (flow_actions & QUEUE_ACTION) {
> +		gen_queue(counter % RXQ_NUM);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & RSS_ACTION) {
> +		if (!counter) {
> +			for (i = 0; i < RXQ_NUM; i++)
> +				queues[i] = i;
> +			gen_rss(queues, RXQ_NUM);
> +		}
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +	if (flow_actions & JUMP_ACTION) {
> +		if (!counter)
> +			gen_jump(next_table);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
> +		actions[actions_counter++].conf = &jump_action;
> +	}
> +	if (flow_actions & PORT_ID_ACTION) {
> +		if (!counter)
> +			gen_port_id();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
> +		actions[actions_counter++].conf = &port_id;
> +	}
> +	if (flow_actions & DROP_ACTION)
> +		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
> +	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
> +		gen_queue((counter % hairpinq) + RXQ_NUM);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & HAIRPIN_RSS_ACTION) {
> +		if (!counter) {
> +			uint16_t hairpin_queues[hairpinq];
> +			for (i = 0; i < hairpinq; i++)
> +				hairpin_queues[i] = i + RXQ_NUM;
> +			gen_rss(hairpin_queues, hairpinq);
> +		}
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +
> +	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
> +}
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	uint16_t hairpinq,
> +	struct rte_flow_error *error)
> +{
> +	struct rte_flow_attr attr;
> +	struct rte_flow_item items[MAX_ITEMS_NUM];
> +	struct rte_flow_action actions[MAX_ACTIONS_NUM];
> +	struct rte_flow *flow = NULL;
> +
> +	memset(items, 0, sizeof(items));
> +	memset(actions, 0, sizeof(actions));
> +	memset(&attr, 0, sizeof(struct rte_flow_attr));
> +
> +	fill_attributes(&attr, flow_attrs, group);
> +
> +	fill_actions(actions, flow_actions,
> +			outer_ip_src, next_table, hairpinq);
> +
> +	fill_items(items, flow_items, outer_ip_src);
> +
> +	flow = rte_flow_create(port_id, &attr, items, actions, error);
> +	return flow;
> +}
> diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
> new file mode 100644
> index 000000000..43d9e7cfe
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.h
> @@ -0,0 +1,63 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * This file contains the items, actions and attributes
> + * definition. And the methods to prepare and fill items,
> + * actions and attributes to generate rte_flow rule.
> + */
> +
> +#ifndef FLOW_PERF_FLOW_GEN
> +#define FLOW_PERF_FLOW_GEN
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "config.h"
> +
> +#define FLOW_ITEM_MASK(_x) (UINT64_C(1) << _x)
> +
> +/* Items */
> +#define ETH_ITEM             FLOW_ITEM_MASK(0)

I don't understand why you don't use enum rte_flow_item_type
members as offsets. In this case don't need these defines
at all, just use FLOW_ITEM_MASK(ETH). It will make it
easier to support new items.

> +#define IPV4_ITEM            FLOW_ITEM_MASK(1)
> +#define IPV6_ITEM            FLOW_ITEM_MASK(2)
> +#define VLAN_ITEM            FLOW_ITEM_MASK(3)
> +#define TCP_ITEM             FLOW_ITEM_MASK(4)
> +#define UDP_ITEM             FLOW_ITEM_MASK(5)
> +#define VXLAN_ITEM           FLOW_ITEM_MASK(6)
> +#define VXLAN_GPE_ITEM       FLOW_ITEM_MASK(7)
> +#define GRE_ITEM             FLOW_ITEM_MASK(8)
> +#define GENEVE_ITEM          FLOW_ITEM_MASK(9)
> +#define GTP_ITEM             FLOW_ITEM_MASK(10)
> +#define META_ITEM            FLOW_ITEM_MASK(11)
> +#define TAG_ITEM             FLOW_ITEM_MASK(12)
> +
> +/* Actions */
> +#define QUEUE_ACTION         FLOW_ITEM_MASK(0)

I don't understand why you don't use enum rte_flow_action_type
members as offsets.

> +#define MARK_ACTION          FLOW_ITEM_MASK(1)
> +#define JUMP_ACTION          FLOW_ITEM_MASK(2)
> +#define RSS_ACTION           FLOW_ITEM_MASK(3)
> +#define COUNT_ACTION         FLOW_ITEM_MASK(4)
> +#define META_ACTION          FLOW_ITEM_MASK(5)
> +#define TAG_ACTION           FLOW_ITEM_MASK(6)
> +#define DROP_ACTION          FLOW_ITEM_MASK(7)
> +#define PORT_ID_ACTION       FLOW_ITEM_MASK(8)
> +#define HAIRPIN_QUEUE_ACTION FLOW_ITEM_MASK(9)
> +#define HAIRPIN_RSS_ACTION   FLOW_ITEM_MASK(10)
> +
> +/* Attributes */
> +#define INGRESS              FLOW_ITEM_MASK(0)
> +#define EGRESS               FLOW_ITEM_MASK(1)
> +#define TRANSFER             FLOW_ITEM_MASK(2)
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	uint16_t hairpinq,
> +	struct rte_flow_error *error);
> +
> +#endif /* FLOW_PERF_FLOW_GEN */
> diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
> new file mode 100644
> index 000000000..1e9479fb8
> --- /dev/null
> +++ b/app/test-flow-perf/items_gen.c
> @@ -0,0 +1,265 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * This file contain the implementations of the items
> + * related methods. Each Item have a method to prepare
> + * the item and add it into items array in given index.
> + */
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "items_gen.h"
> +#include "config.h"
> +
> +/* Current design is single threaded. */
> +static struct rte_flow_item_eth eth_spec;
> +static struct rte_flow_item_eth eth_mask;

It looks like that the design has problems with:
eth / ip4 / udp / vxlan /eth / end
patterns.

> +static struct rte_flow_item_vlan vlan_spec;
> +static struct rte_flow_item_vlan vlan_mask;
> +static struct rte_flow_item_ipv4 ipv4_spec;
> +static struct rte_flow_item_ipv4 ipv4_mask;
> +static struct rte_flow_item_ipv6 ipv6_spec;
> +static struct rte_flow_item_ipv6 ipv6_mask;
> +static struct rte_flow_item_udp udp_spec;
> +static struct rte_flow_item_udp udp_mask;
> +static struct rte_flow_item_tcp tcp_spec;
> +static struct rte_flow_item_tcp tcp_mask;
> +static struct rte_flow_item_vxlan vxlan_spec;
> +static struct rte_flow_item_vxlan vxlan_mask;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
> +static struct rte_flow_item_gre gre_spec;
> +static struct rte_flow_item_gre gre_mask;
> +static struct rte_flow_item_geneve geneve_spec;
> +static struct rte_flow_item_geneve geneve_mask;
> +static struct rte_flow_item_gtp gtp_spec;
> +static struct rte_flow_item_gtp gtp_mask;
> +static struct rte_flow_item_meta meta_spec;
> +static struct rte_flow_item_meta meta_mask;
> +static struct rte_flow_item_tag tag_spec;
> +static struct rte_flow_item_tag tag_mask;

I think that such amount of global variables makes the code
hard to read and maintain. If that's only me, no problem.

> +
> +
> +void
> +add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
> +	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
> +	eth_spec.type = 0;
> +	eth_mask.type = 0;

What's the point to set type to 0 if you just memset the entire
structure?

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
> +	items[items_counter].spec = &eth_spec;
> +	items[items_counter].mask = &eth_mask;
> +}
> +
> +void
> +add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t vlan_value = VLAN_VALUE;
> +	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
> +	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
> +
> +	vlan_spec.tci = RTE_BE16(vlan_value);
> +	vlan_mask.tci = RTE_BE16(0xffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
> +	items[items_counter].spec = &vlan_spec;
> +	items[items_counter].mask = &vlan_mask;
> +}
> +
> +void
> +add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, uint32_t src_ipv4)

Shouldn't src_ipv4 be rte_be32?

> +{
> +	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
> +	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
> +
> +	ipv4_spec.hdr.src_addr = src_ipv4;
> +	ipv4_mask.hdr.src_addr = 0xffffffff;

RTE_BE32() is missing above (at least to be consistent
with RTE_BE16() few lines above.

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
> +	items[items_counter].spec = &ipv4_spec;
> +	items[items_counter].mask = &ipv4_mask;
> +}
> +
> +
> +void
> +add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, int src_ipv6)

I think such specification of the source IPv6 address is
very confusing. If you really need, it would be nice
to explain why comments.

> +{
> +	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
> +	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
> +
> +	/** Set ipv6 src **/
> +	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
> +		sizeof(ipv6_spec.hdr.src_addr) / 2);
> +
> +	/** Full mask **/
> +	memset(&ipv6_mask.hdr.src_addr, 1,
> +		sizeof(ipv6_spec.hdr.src_addr));

Are you that 1 is what you really want here? May be 0xff?

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
> +	items[items_counter].spec = &ipv6_spec;
> +	items[items_counter].mask = &ipv6_mask;
> +}
> +
> +void
> +add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
> +	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
> +	items[items_counter].spec = &tcp_spec;
> +	items[items_counter].mask = &tcp_mask;
> +}
> +
> +void
> +add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
> +	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
> +	items[items_counter].spec = &udp_spec;
> +	items[items_counter].mask = &udp_mask;
> +}
> +
> +void
> +add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
> +	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
> +
> +	/* Set standard vxlan vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* Standard vxlan flags */
> +	vxlan_spec.flags = 0x8;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
> +	items[items_counter].spec = &vxlan_spec;
> +	items[items_counter].mask = &vxlan_mask;
> +}
> +
> +void
> +add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +
> +	/* Set vxlan-gpe vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_gpe_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* vxlan-gpe flags */
> +	vxlan_gpe_spec.flags = 0x0c;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
> +	items[items_counter].spec = &vxlan_gpe_spec;
> +	items[items_counter].mask = &vxlan_gpe_mask;
> +}
> +
> +void
> +add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t proto = GRE_PROTO;
> +	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
> +	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
> +
> +	gre_spec.protocol = RTE_BE16(proto);
> +	gre_mask.protocol = 0xffff;

RTE_BE16(0xffff) to be consistent

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
> +	items[items_counter].spec = &gre_spec;
> +	items[items_counter].mask = &gre_mask;
> +}
> +
> +void
> +add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
> +	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
> +
> +	for (i = 0; i < 3; i++) {
> +		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
> +		geneve_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
> +	items[items_counter].spec = &geneve_spec;
> +	items[items_counter].mask = &geneve_mask;
> +}
> +
> +void
> +add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t teid_value = TEID_VALUE;
> +	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
> +	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
> +
> +	gtp_spec.teid = RTE_BE32(teid_value);
> +	gtp_mask.teid = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
> +	items[items_counter].spec = &gtp_spec;
> +	items[items_counter].mask = &gtp_mask;
> +}
> +
> +void
> +add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
> +	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
> +
> +	meta_spec.data = RTE_BE32(data);
> +	meta_mask.data = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
> +	items[items_counter].spec = &meta_spec;
> +	items[items_counter].mask = &meta_mask;
> +}
> +
> +
> +void
> +add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	uint8_t index = TAG_INDEX;
> +	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
> +	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
> +
> +	tag_spec.data = RTE_BE32(data);
> +	tag_mask.data = RTE_BE32(0xffffffff);
> +	tag_spec.index = index;
> +	tag_mask.index = 0xff;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
> +	items[items_counter].spec = &tag_spec;
> +	items[items_counter].mask = &tag_mask;
> +}

[snip]

> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> index 7a924cdb7..463e4a782 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -35,29 +35,156 @@
>  #include <rte_flow.h>
>  
>  #include "config.h"
> +#include "flow_gen.h"
>  
> -static uint32_t nb_lcores;
> +#define MAX_ITERATIONS             100
> +#define DEFAULT_RULES_COUNT    4000000
> +#define DEFAULT_ITERATION       100000
> +
> +struct rte_flow *flow;
> +static uint8_t flow_group;
> +
> +static uint16_t flow_items;
> +static uint16_t flow_actions;
> +static uint8_t flow_attrs;
> +static volatile bool force_quit;
> +static bool dump_iterations;
>  static struct rte_mempool *mbuf_mp;
> +static uint32_t nb_lcores;
> +static uint32_t flows_count;
> +static uint32_t iterations_number;
> +static uint32_t hairpinq;

Global variables again.

>  static void
>  usage(char *progname)
>  {
>  	printf("\nusage: %s\n", progname);
> +	printf("\nControl configurations:\n");
> +	printf("  --flows-count=N: to set the number of needed"
> +		" flows to insert, default is 4,000,000\n");
> +	printf("  --dump-iterations: To print rates for each"
> +		" iteration\n");
> +
> +	printf("To set flow attributes:\n");
> +	printf("  --ingress: set ingress attribute in flows\n");
> +	printf("  --egress: set egress attribute in flows\n");
> +	printf("  --transfer: set transfer attribute in flows\n");
> +	printf("  --group=N: set group for all flows,"
> +		" default is 0\n");
> +
> +	printf("To set flow items:\n");
> +	printf("  --ether: add ether layer in flow items\n");
> +	printf("  --vlan: add vlan layer in flow items\n");
> +	printf("  --ipv4: add ipv4 layer in flow items\n");
> +	printf("  --ipv6: add ipv6 layer in flow items\n");
> +	printf("  --tcp: add tcp layer in flow items\n");
> +	printf("  --udp: add udp layer in flow items\n");
> +	printf("  --vxlan: add vxlan layer in flow items\n");
> +	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
> +	printf("  --gre: add gre layer in flow items\n");
> +	printf("  --geneve: add geneve layer in flow items\n");
> +	printf("  --gtp: add gtp layer in flow items\n");
> +	printf("  --meta: add meta layer in flow items\n");
> +	printf("  --tag: add tag layer in flow items\n");
> +
> +	printf("To set flow actions:\n");
> +	printf("  --port-id: add port-id action in flow actions\n");
> +	printf("  --rss: add rss action in flow actions\n");
> +	printf("  --queue: add queue action in flow actions\n");
> +	printf("  --jump: add jump action in flow actions\n");
> +	printf("  --mark: add mark action in flow actions\n");
> +	printf("  --count: add count action in flow actions\n");
> +	printf("  --set-meta: add set meta action in flow actions\n");
> +	printf("  --set-tag: add set tag action in flow actions\n");
> +	printf("  --drop: add drop action in flow actions\n");
> +	printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
> +	printf("  --hairpin-rss=N: add hairping-rss action in flow actions\n");
>  }
>  
>  static void
>  args_parse(int argc, char **argv)
>  {
>  	char **argvopt;
> -	int opt;
> +	int n, opt, i;
>  	int opt_idx;
> +	static const char * const items_str[] = {
> +		"ether", "vlan", "ipv4", "ipv6",

Typicaly Ethernet pattern item is names "eth" (e.g.
in testpmd if I'm not mistaken). Why does it differ here?

> +		"tcp", "udp", "vxlan", "vxlan-gpe",
> +		"gre", "geneve", "gtp", "meta",
> +		"tag"
> +	};
> +	uint32_t items[] = {
> +		ETH_ITEM, VLAN_ITEM, IPV4_ITEM,
> +		IPV6_ITEM, TCP_ITEM, UDP_ITEM,
> +		VXLAN_ITEM, VXLAN_GPE_ITEM, GRE_ITEM,
> +		GENEVE_ITEM, GTP_ITEM, META_ITEM,
> +		TAG_ITEM
> +	};

I guess above two structures should be both const and must
be in sync. If so, it would be better to create one
array structures with name and mask members. It is more
robust.

> +	static const char * const attributes_str[] = {
> +		"ingress", "egress", "transfer"
> +	};
> +	uint32_t attributes[] = {
> +		INGRESS, EGRESS, TRANSFER
> +	};

Same as above.

> +	static const char * const actions_str[] = {
> +		"port-id", "rss", "queue", "jump",
> +		"mark", "count", "set-meta", "set-tag",
> +		"drop",
> +	};
> +	uint32_t actions[] = {
> +		PORT_ID_ACTION, RSS_ACTION, QUEUE_ACTION,
> +		JUMP_ACTION, MARK_ACTION, COUNT_ACTION,
> +		META_ACTION, TAG_ACTION, DROP_ACTION
> +	};

Same as above.

> +	int items_size = RTE_DIM(items);
> +	int attributes_size = RTE_DIM(attributes);
> +	int actions_size = RTE_DIM(actions);
> +
>  	static struct option lgopts[] = {
>  		/* Control */
>  		{ "help",                       0, 0, 0 },
> +		{ "flows-count",                1, 0, 0 },
> +		{ "dump-iterations",            0, 0, 0 },
> +		/* Attributes */
> +		{ "ingress",                    0, 0, 0 },
> +		{ "egress",                     0, 0, 0 },
> +		{ "transfer",                   0, 0, 0 },

I think it should be possible to add these items in
a loop by attributes_str-like array.

> +		{ "group",                      1, 0, 0 },
> +		/* Items */
> +		{ "ether",                      0, 0, 0 },
> +		{ "vlan",                       0, 0, 0 },
> +		{ "ipv4",                       0, 0, 0 },
> +		{ "ipv6",                       0, 0, 0 },
> +		{ "tcp",                        0, 0, 0 },
> +		{ "udp",                        0, 0, 0 },
> +		{ "vxlan",                      0, 0, 0 },
> +		{ "vxlan-gpe",                  0, 0, 0 },
> +		{ "gre",                        0, 0, 0 },
> +		{ "geneve",                     0, 0, 0 },
> +		{ "gtp",                        0, 0, 0 },
> +		{ "meta",                       0, 0, 0 },
> +		{ "tag",                        0, 0, 0 },

I think it should be possible to add these items in
a loop by items_str-like array.  It would
allow to avoid copy-paste and simplify new
items addition.

> +		/* Actions */
> +		{ "port-id",                    0, 0, 0 },
> +		{ "rss",                        0, 0, 0 },
> +		{ "queue",                      0, 0, 0 },
> +		{ "jump",                       0, 0, 0 },
> +		{ "mark",                       0, 0, 0 },
> +		{ "count",                      0, 0, 0 },
> +		{ "set-meta",                   0, 0, 0 },
> +		{ "set-tag",                    0, 0, 0 },
> +		{ "drop",                       0, 0, 0 },

I think it should be possible to add these items in
a loop by actions_str-like array.  It would allow to
avoid copy-paste and simplify new actions addition.


> +		{ "hairpin-queue",              1, 0, 0 },
> +		{ "hairpin-rss",                1, 0, 0 },
>  	};
>  
> +	flow_items = 0;
> +	flow_actions = 0;
> +	flow_attrs = 0;
> +	hairpinq = 0;
>  	argvopt = argv;
>  
> +	printf(":: Flow -> ");
>  	while ((opt = getopt_long(argc, argvopt, "",
>  				lgopts, &opt_idx)) != EOF) {
>  		switch (opt) {
> @@ -66,6 +193,73 @@ args_parse(int argc, char **argv)
>  				usage(argv[0]);
>  				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>  			}
> +
> +			/* Attributes */
> +			for (i = 0; i < attributes_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						attributes_str[i])) {

Comparison vs 0. Many cases below.

> +					flow_attrs |= attributes[i];
> +					printf("%s / ", attributes_str[i]);
> +				}
> +			if (!strcmp(lgopts[opt_idx].name, "group")) {
> +				n = atoi(optarg);
> +				if (n >= 0)
> +					flow_group = n;
> +				else
> +					rte_exit(EXIT_SUCCESS,
> +						"flow group should be >= 0");
> +				printf("group %d ", flow_group);
> +			}
> +
> +			/* Items */
> +			for (i = 0; i < items_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						items_str[i])) {
> +					flow_items |= items[i];
> +					printf("%s / ", items_str[i]);
> +				}
> +
> +			/* Actions */
> +			for (i = 0; i < actions_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						actions_str[i])) {
> +					flow_actions |= actions[i];
> +					printf("%s / ", actions_str[i]);
> +				}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
> +				n = atoi(optarg);
> +				if (n > 0)
> +					hairpinq = n;
> +				else
> +					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
> +
> +				flow_actions |= HAIRPIN_RSS_ACTION;
> +				printf("hairpin-rss / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
> +				n = atoi(optarg);
> +				if (n > 0)
> +					hairpinq = n;
> +				else
> +					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
> +
> +				flow_actions |= HAIRPIN_QUEUE_ACTION;
> +				printf("hairpin-queue / ");
> +			}
> +
> +			/* Control */
> +			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
> +				n = atoi(optarg);
> +				if (n > (int) iterations_number)
> +					flows_count = n;
> +				else {
> +					printf("\n\nflows_count should be > %d",
> +						iterations_number);
> +					rte_exit(EXIT_SUCCESS, " ");
> +				}
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
> +				dump_iterations = true;
>  			break;
>  		default:
>  			printf("Invalid option: %s\n", argv[optind]);
> @@ -74,15 +268,141 @@ args_parse(int argc, char **argv)
>  			break;
>  		}
>  	}
> +	printf("end_flow\n");
> +}
> +
> +static void
> +print_flow_error(struct rte_flow_error error)
> +{
> +	printf("Flow can't be created %d message: %s\n",
> +		error.type,
> +		error.message ? error.message : "(no stated reason)");
> +}
> +
> +static inline void
> +flows_handler(void)
> +{
> +	struct rte_flow_error error;
> +	clock_t start_iter, end_iter;
> +	double cpu_time_used;
> +	double flows_rate;
> +	double cpu_time_per_iter[MAX_ITERATIONS];
> +	double delta;
> +	uint16_t nr_ports;
> +	uint32_t i;
> +	int port_id;
> +	int iter_id;
> +	uint32_t eagain_counter = 0;
> +
> +	nr_ports = rte_eth_dev_count_avail();
> +
> +	for (i = 0; i < MAX_ITERATIONS; i++)
> +		cpu_time_per_iter[i] = -1;
> +
> +	if (iterations_number > flows_count)
> +		iterations_number = flows_count;
> +
> +	printf(":: Flows Count per port: %d\n", flows_count);
> +
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		cpu_time_used = 0;
> +		if (flow_group > 0) {
> +			/*
> +			 * Create global rule to jump into flow_group,
> +			 * this way the app will avoid the default rules.
> +			 *
> +			 * Golbal rule:
> +			 * group 0 eth / end actions jump group <flow_group>
> +			 *
> +			 */
> +			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
> +				JUMP_ACTION, flow_group, 0, 0, &error);
> +
> +			if (!flow) {

Comparison vs NULL

> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +		}
> +
> +		/* Insertion Rate */
> +		printf("Flows insertion on port = %d\n", port_id);
> +		start_iter = clock();
> +		for (i = 0; i < flows_count; i++) {
> +			do {
> +				rte_errno = 0;
> +				flow = generate_flow(port_id, flow_group,
> +					flow_attrs, flow_items, flow_actions,
> +					JUMP_ACTION_TABLE, i, hairpinq, &error);
> +				if (!flow)
> +					eagain_counter++;
> +			} while (rte_errno == EAGAIN);
> +
> +			if (force_quit)
> +				i = flows_count;
> +
> +			if (!flow) {
> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +
> +			if (i && !((i + 1) % iterations_number)) {
> +				/* Save the insertion rate of each iter */
> +				end_iter = clock();
> +				delta = (double) (end_iter - start_iter);
> +				iter_id = ((i + 1) / iterations_number) - 1;
> +				cpu_time_per_iter[iter_id] =
> +					delta / CLOCKS_PER_SEC;
> +				cpu_time_used += cpu_time_per_iter[iter_id];
> +				start_iter = clock();
> +			}
> +		}
> +
> +		/* Iteration rate per iteration */
> +		if (dump_iterations)
> +			for (i = 0; i < MAX_ITERATIONS; i++) {
> +				if (cpu_time_per_iter[i] == -1)
> +					continue;
> +				delta = (double)(iterations_number /
> +					cpu_time_per_iter[i]);
> +				flows_rate = delta / 1000;
> +				printf(":: Iteration #%d: %d flows "
> +					"in %f sec[ Rate = %f K/Sec ]\n",
> +					i, iterations_number,
> +					cpu_time_per_iter[i], flows_rate);
> +			}
> +
> +		/* Insertion rate for all flows */
> +		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
> +		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
> +						flows_rate);
> +		printf(":: The time for creating %d in flows %f seconds\n",
> +						flows_count, cpu_time_used);
> +		printf(":: EAGAIN counter = %d\n", eagain_counter);
> +	}
> +}
> +
> +static void
> +signal_handler(int signum)
> +{
> +	if (signum == SIGINT || signum == SIGTERM) {
> +		printf("\n\nSignal %d received, preparing to exit...\n",
> +					signum);
> +		printf("Error: Stats are wrong due to sudden signal!\n\n");
> +		force_quit = true;
> +	}

It is the patch which starts to use sigint.h and it should
be included in the patch, not the previous one.

>  }
>  
>  static void
>  init_port(void)
>  {
>  	int ret;
> -	uint16_t i;
> +	uint16_t i, j;
>  	uint16_t port_id;
>  	uint16_t nr_ports;
> +	uint16_t nr_queues;
> +	struct rte_eth_hairpin_conf hairpin_conf = {
> +		.peer_count = 1,
> +	};
>  	struct rte_eth_conf port_conf = {
>  		.rx_adv_conf = {
>  			.rss_conf.rss_hf =
> @@ -94,6 +414,10 @@ init_port(void)
>  	struct rte_eth_rxconf rxq_conf;
>  	struct rte_eth_dev_info dev_info;
>  
> +	nr_queues = RXQ_NUM;
> +	if (hairpinq)

Comparison vs 0

> +		nr_queues = RXQ_NUM + hairpinq;
> +
>  	nr_ports = rte_eth_dev_count_avail();
>  	if (nr_ports == 0)
>  		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
> @@ -118,8 +442,8 @@ init_port(void)
>  
>  		printf(":: initializing port: %d\n", port_id);
>  
> -		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
> -				TXQ_NUM, &port_conf);
> +		ret = rte_eth_dev_configure(port_id, nr_queues,
> +				nr_queues, &port_conf);
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE,
>  				":: cannot configure device: err=%d, port=%u\n",
> @@ -159,6 +483,30 @@ init_port(void)
>  				":: promiscuous mode enable failed: err=%s, port=%u\n",
>  				rte_strerror(-ret), port_id);
>  
> +		if (hairpinq) {
> +			for (i = RXQ_NUM, j = 0; i < nr_queues; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + TXQ_NUM;
> +				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
> +					NR_RXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}
> +
> +			for (i = TXQ_NUM, j = 0; i < nr_queues; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + RXQ_NUM;
> +				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
> +					NR_TXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}
> +		}
> +
>  		ret = rte_eth_dev_start(port_id);
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE,
> @@ -180,6 +528,15 @@ main(int argc, char **argv)
>  	if (ret < 0)
>  		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>  
> +	force_quit = false;
> +	dump_iterations = false;
> +	flows_count = DEFAULT_RULES_COUNT;
> +	iterations_number = DEFAULT_ITERATION;
> +	flow_group = 0;
> +
> +	signal(SIGINT, signal_handler);
> +	signal(SIGTERM, signal_handler);
> +
>  	argc -= ret;
>  	argv += ret;
>  	if (argc > 1)
> @@ -191,6 +548,8 @@ main(int argc, char **argv)
>  	if (nb_lcores <= 1)
>  		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
>  
> +	flows_handler();
> +
>  	RTE_ETH_FOREACH_DEV(port) {
>  		rte_flow_flush(port, &error);
>  		rte_eth_dev_stop(port);

[snip]

> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> index 49eb450ae..b45fccd69 100644
> --- a/doc/guides/tools/flow-perf.rst
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -4,7 +4,18 @@
>  Flow performance tool
>  =====================
>  
> -Application for rte_flow performance testing.
> +Application for rte_flow performance testing. The application provide the

As far as I remember documentaiton guidelines recommend to
start new sentenses from a new line.

> +ability to test insertion rate of specific rte_flow rule, by stressing it
> +to the NIC, and calculate the insertion rate.
> +
> +The application offers some options in the command line, to configure
> +which rule to apply.
> +
> +After that the application will start producing rules with same pattern
> +but increasing the outer IP source address by 1 each time, thus it will
> +give different flow each time, and all other items will have open masks.
> +
> +The current design have single core insertion rate.
>  
>  
>  Compiling the Application

[snip]


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton
  2020-05-06 14:25                   ` Andrew Rybchenko
@ 2020-05-06 17:07                     ` Wisam Monther
  2020-05-06 17:15                       ` Andrew Rybchenko
  0 siblings, 1 reply; 102+ messages in thread
From: Wisam Monther @ 2020-05-06 17:07 UTC (permalink / raw)
  To: Andrew Rybchenko, dev, Jack Min, Thomas Monjalon, jerinjacobk,
	gerlitz.or, l.yan, ajit.khaparde



>-----Original Message-----
>From: Andrew Rybchenko <arybchenko@solarflare.com>
>Sent: Wednesday, May 6, 2020 5:26 PM
>To: Wisam Monther <wisamm@mellanox.com>; dev@dpdk.org; Jack Min
><jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>jerinjacobk@gmail.com; gerlitz.or@gmail.com; l.yan@epfl.ch;
>ajit.khaparde@broadcom.com
>Subject: Re: [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance
>skeleton
>
>On 5/6/20 3:36 PM, Wisam Jaddo wrote:
>> Add flow performance application skeleton.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>> ---
>
>[snip]
>
>> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c new
>> file mode 100644 index 000000000..7a924cdb7
>> --- /dev/null
>> +++ b/app/test-flow-perf/main.c
>> @@ -0,0 +1,200 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright 2020 Mellanox Technologies, Ltd
>> + *
>> + * This file contain the application main file
>> + * This application provides the user the ability to test the
>> + * insertion rate for specific rte_flow rule under stress state ~4M
>> +rule/
>> + *
>> + * Then it will also provide packet per second measurement after
>> +installing
>> + * all rules, the user may send traffic to test the PPS that match
>> +the rules
>> + * after all rules are installed, to check performance or
>> +functionality after
>> + * the stress.
>> + *
>> + * The flows insertion will go for all ports first, then it will
>> +print the
>> + * results, after that the application will go into forwarding
>> +packets mode
>> + * it will start receiving traffic if any and then forwarding it back
>> +and
>> + * gives packet per second measurement.
>> + */
>> +
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <stdint.h>
>> +#include <inttypes.h>
>> +#include <stdarg.h>
>> +#include <errno.h>
>> +#include <getopt.h>
>> +#include <signal.h>
>> +#include <stdbool.h>
>> +#include <sys/time.h>
>> +
>> +#include <rte_malloc.h>
>> +#include <rte_mempool.h>
>> +#include <rte_mbuf.h>
>> +#include <rte_ethdev.h>
>> +#include <rte_flow.h>
>> +
>> +#include "config.h"
>> +
>> +static uint32_t nb_lcores;
>> +static struct rte_mempool *mbuf_mp;
>> +
>> +static void
>> +usage(char *progname)
>> +{
>> +	printf("\nusage: %s\n", progname);
>> +}
>> +
>> +static void
>> +args_parse(int argc, char **argv)
>> +{
>> +	char **argvopt;
>> +	int opt;
>> +	int opt_idx;
>> +	static struct option lgopts[] = {
>> +		/* Control */
>> +		{ "help",                       0, 0, 0 },
>> +	};
>> +
>> +	argvopt = argv;
>> +
>> +	while ((opt = getopt_long(argc, argvopt, "",
>> +				lgopts, &opt_idx)) != EOF) {
>> +		switch (opt) {
>> +		case 0:
>> +			if (!strcmp(lgopts[opt_idx].name, "help")) {
>
>DPDK coding style recommends to compare vs 0 instead of logical not.

Ok, will move it

>
>> +				usage(argv[0]);
>> +				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>> +			}
>> +			break;
>> +		default:
>> +			printf("Invalid option: %s\n", argv[optind]);
>
>Again, sorry if I missed reply: Why error is not logged to stderr?

No, I missed it, will move it to stderr

>
>> +			usage(argv[0]);
>> +			rte_exit(EXIT_SUCCESS, "Invalid option\n");
>> +			break;
>> +		}
>> +	}
>> +}
>> +
>> +static void
>> +init_port(void)
>> +{
>> +	int ret;
>> +	uint16_t i;
>> +	uint16_t port_id;
>> +	uint16_t nr_ports;
>> +	struct rte_eth_conf port_conf = {
>> +		.rx_adv_conf = {
>> +			.rss_conf.rss_hf =
>> +				ETH_RSS_IP  |
>> +				ETH_RSS_TCP,
>> +		}
>> +	};
>> +	struct rte_eth_txconf txq_conf;
>> +	struct rte_eth_rxconf rxq_conf;
>> +	struct rte_eth_dev_info dev_info;
>> +
>> +	nr_ports = rte_eth_dev_count_avail();
>> +	if (nr_ports == 0)
>> +		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
>> +
>> +	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
>> +					TOTAL_MBUF_NUM,
>MBUF_CACHE_SIZE,
>> +					0, MBUF_SIZE,
>> +					rte_socket_id());
>> +	if (mbuf_mp == NULL)
>> +		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
>> +
>> +	for (port_id = 0; port_id < nr_ports; port_id++) {
>> +		ret = rte_eth_dev_info_get(port_id, &dev_info);
>> +		if (ret != 0)
>> +			rte_exit(EXIT_FAILURE,
>> +				"Error during getting device"
>> +				" (port %u) info: %s\n",
>> +				port_id, strerror(-ret));
>> +
>> +		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
>> +		port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
>> +
>> +		printf(":: initializing port: %d\n", port_id);
>> +
>> +		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
>> +				TXQ_NUM, &port_conf);
>> +		if (ret < 0)
>> +			rte_exit(EXIT_FAILURE,
>> +				":: cannot configure device: err=%d,
>port=%u\n",
>> +				ret, port_id);
>> +
>> +		rxq_conf = dev_info.default_rxconf;
>> +		rxq_conf.offloads = port_conf.rxmode.offloads;
>
>
>As far as I know there is no necessity to repeat port offlaod on queue level.
>So, the line is not necesary.

Yes you are right, just checked the code, it takes the offloads from the port it self.
Will remove it.

>
>> +
>> +		for (i = 0; i < RXQ_NUM; i++) {
>> +			ret = rte_eth_rx_queue_setup(port_id, i, NR_RXD,
>> +					rte_eth_dev_socket_id(port_id),
>> +					&rxq_conf,
>> +					mbuf_mp);
>> +			if (ret < 0)
>> +				rte_exit(EXIT_FAILURE,
>> +					":: Rx queue setup failed: err=%d,
>port=%u\n",
>> +					ret, port_id);
>> +		}
>> +
>> +		txq_conf = dev_info.default_txconf;
>> +		txq_conf.offloads = port_conf.txmode.offloads;
>
>As far as I know there is no necessity to repeat port offlaod on queue level.
>So, the line is not necesary.

Will remove it

>
>> +
>> +		for (i = 0; i < TXQ_NUM; i++) {
>> +			ret = rte_eth_tx_queue_setup(port_id, i, NR_TXD,
>> +					rte_eth_dev_socket_id(port_id),
>> +					&txq_conf);
>> +			if (ret < 0)
>> +				rte_exit(EXIT_FAILURE,
>> +					":: Tx queue setup failed: err=%d,
>port=%u\n",
>> +					ret, port_id);
>> +		}
>> +
>> +		/* Catch all packets from traffic generator. */
>> +		ret = rte_eth_promiscuous_enable(port_id);
>> +		if (ret != 0)
>> +			rte_exit(EXIT_FAILURE,
>> +				":: promiscuous mode enable failed: err=%s,
>port=%u\n",
>> +				rte_strerror(-ret), port_id);
>> +
>> +		ret = rte_eth_dev_start(port_id);
>> +		if (ret < 0)
>> +			rte_exit(EXIT_FAILURE,
>> +				"rte_eth_dev_start:err=%d, port=%u\n",
>> +				ret, port_id);
>> +
>> +		printf(":: initializing port: %d done\n", port_id);
>> +	}
>> +}
>> +
>> +int
>> +main(int argc, char **argv)
>> +{
>> +	int ret;
>> +	uint16_t port;
>> +	struct rte_flow_error error;
>> +
>> +	ret = rte_eal_init(argc, argv);
>> +	if (ret < 0)
>> +		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>> +
>> +	argc -= ret;
>> +	argv += ret;
>> +	if (argc > 1)
>> +		args_parse(argc, argv);
>> +
>> +	init_port();
>> +
>> +	nb_lcores = rte_lcore_count();
>> +	if (nb_lcores <= 1)
>> +		rte_exit(EXIT_FAILURE, "This app needs at least two
>cores\n");
>> +
>> +	RTE_ETH_FOREACH_DEV(port) {
>> +		rte_flow_flush(port, &error);
>> +		rte_eth_dev_stop(port);
>> +		rte_eth_dev_close(port);
>> +	}
>> +	return 0;
>> +}
>
>[snip]
>
>> diff --git a/config/common_base b/config/common_base index
>> 14000ba07..b2edd5267 100644 diff --git
>> a/doc/guides/rel_notes/release_20_05.rst
>> b/doc/guides/rel_notes/release_20_05.rst
>> index b124c3f28..258b1e03e 100644
>> --- a/doc/guides/rel_notes/release_20_05.rst
>> +++ b/doc/guides/rel_notes/release_20_05.rst
>> @@ -212,6 +212,16 @@ New Features
>>    * Added IPsec inbound load-distribution support for ipsec-secgw
>application
>>      using NIC load distribution feature(Flow Director).
>>
>> +* **Added flow performance application.**
>> +
>> +  Add new application to test rte_flow performance.
>> +
>> +  Application features:
>> +  * Measure rte_flow insertion rate.
>> +  * Measure rte_flow deletion rate.
>> +  * Dump rte_flow memory consumption.
>> +  * Measure packet per second forwarding.
>
>I think above lines should be added in appropriate patches which really do it.

What do you mean?
each feature should add it's own line in the same commit?

>
>> +
>>
>>  Removed Items
>>  -------------
>
>[snip]


^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton
  2020-05-06 17:07                     ` Wisam Monther
@ 2020-05-06 17:15                       ` Andrew Rybchenko
  0 siblings, 0 replies; 102+ messages in thread
From: Andrew Rybchenko @ 2020-05-06 17:15 UTC (permalink / raw)
  To: Wisam Monther, dev, Jack Min, Thomas Monjalon, jerinjacobk,
	gerlitz.or, l.yan, ajit.khaparde

On 5/6/20 8:07 PM, Wisam Monther wrote:
>> -----Original Message-----
>> From: Andrew Rybchenko <arybchenko@solarflare.com>
>> Sent: Wednesday, May 6, 2020 5:26 PM
>> To: Wisam Monther <wisamm@mellanox.com>; dev@dpdk.org; Jack Min
>> <jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>> jerinjacobk@gmail.com; gerlitz.or@gmail.com; l.yan@epfl.ch;
>> ajit.khaparde@broadcom.com
>> Subject: Re: [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance
>> skeleton
>>
>> On 5/6/20 3:36 PM, Wisam Jaddo wrote:
>>> Add flow performance application skeleton.
>>>
>>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>>> ---

[snip]

>>> diff --git a/config/common_base b/config/common_base index
>>> 14000ba07..b2edd5267 100644 diff --git
>>> a/doc/guides/rel_notes/release_20_05.rst
>>> b/doc/guides/rel_notes/release_20_05.rst
>>> index b124c3f28..258b1e03e 100644
>>> --- a/doc/guides/rel_notes/release_20_05.rst
>>> +++ b/doc/guides/rel_notes/release_20_05.rst
>>> @@ -212,6 +212,16 @@ New Features
>>>    * Added IPsec inbound load-distribution support for ipsec-secgw
>> application
>>>      using NIC load distribution feature(Flow Director).
>>>
>>> +* **Added flow performance application.**
>>> +
>>> +  Add new application to test rte_flow performance.
>>> +
>>> +  Application features:
>>> +  * Measure rte_flow insertion rate.
>>> +  * Measure rte_flow deletion rate.
>>> +  * Dump rte_flow memory consumption.
>>> +  * Measure packet per second forwarding.
>> I think above lines should be added in appropriate patches which really do it.
> What do you mean?
> each feature should add it's own line in the same commit?

As I understand there features are not available if the first patch
only is applied. I.e. ideally features should be mentioned when
they are actually added (the second patch if I understand correctly).

^ permalink raw reply	[flat|nested] 102+ messages in thread

* Re: [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation
  2020-05-06 15:23                   ` Andrew Rybchenko
@ 2020-05-07 12:38                     ` Wisam Monther
  0 siblings, 0 replies; 102+ messages in thread
From: Wisam Monther @ 2020-05-07 12:38 UTC (permalink / raw)
  To: Andrew Rybchenko, dev, Jack Min, Thomas Monjalon, jerinjacobk,
	gerlitz.or, l.yan, ajit.khaparde
  Cc: Stephen Hemminger, david.marchand

>-----Original Message-----
>From: Andrew Rybchenko <arybchenko@solarflare.com>
>Sent: Wednesday, May 6, 2020 6:24 PM
>To: Wisam Monther <wisamm@mellanox.com>; dev@dpdk.org; Jack Min
><jackmin@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>jerinjacobk@gmail.com; gerlitz.or@gmail.com; l.yan@epfl.ch;
>ajit.khaparde@broadcom.com
>Cc: Stephen Hemminger <stephen@networkplumber.org>;
>david.marchand@redhat.com
>Subject: Re: [dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate
>calculation
>
>My biggest concert with the patch is usage of huge number of global variables
>which makes the code hard to read, understand and maintain. See my notes
>below.
>Please, share your thoughts.

I can do the following regard the global:
Global vars in main.c:
I think it's ok to leave them as it, since they are control and they straightforward.
As for items_gen.c: I'll move them inside the functions itself, and get rid of all global there.
As for actions_gen.c: I'll redesign them to be as items and get rid of global vars.

What do you think?

>
>On 5/6/20 3:36 PM, Wisam Jaddo wrote:
>> Add insertion rate calculation feature into flow performance
>> application.
>>
>> The application now provide the ability to test insertion rate of
>> specific rte_flow rule, by stressing it to the NIC, and calculate the
>> insertion rate.
>>
>> The application offers some options in the command line, to configure
>> which rule to apply.
>>
>> After that the application will start producing rules with same
>> pattern but increasing the outer IP source address by 1 each time,
>> thus it will give different flow each time, and all other items will
>> have open masks.
>>
>> The current design have single core insertion rate.
>> In the future we may have a multi core insertion rate measurement
>> support in the app.
>>
>> Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
>> ---
>
>[snip]
>
>> diff --git a/app/test-flow-perf/actions_gen.c
>> b/app/test-flow-perf/actions_gen.c
>> new file mode 100644
>> index 000000000..fa60084cf
>> --- /dev/null
>> +++ b/app/test-flow-perf/actions_gen.c
>> @@ -0,0 +1,88 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright 2020 Mellanox Technologies, Ltd
>> + *
>> + * The file contains the implementations of actions generators.
>> + * Each generator is responsible for preparing it's action instance
>> + * and initializing it with needed data.
>> + **/
>> +
>> +#include <sys/types.h>
>> +#include <rte_malloc.h>
>> +#include <rte_flow.h>
>> +#include <rte_ethdev.h>
>> +
>> +#define ALLOCATE_ACTION_VARS
>> +#include "actions_gen.h"
>> +#include "config.h"
>> +
>> +void
>> +gen_mark(void)
>> +{
>> +	mark_action.id = MARK_ID;
>> +}
>> +
>> +void
>> +gen_queue(uint16_t queue)
>> +{
>> +	queue_action.index = queue;
>> +}
>> +
>> +void
>> +gen_jump(uint16_t next_table)
>> +{
>> +	jump_action.group = next_table;
>> +}
>> +
>> +void
>> +gen_rss(uint16_t *queues, uint16_t queues_number) {
>> +	uint16_t queue;
>> +	struct action_rss_data *rss_data;
>> +
>> +	rss_data = rte_malloc("rss_data",
>> +		sizeof(struct action_rss_data), 0);
>> +
>> +	if (rss_data == NULL)
>> +		rte_exit(EXIT_FAILURE, "No Memory available!");
>> +
>> +	*rss_data = (struct action_rss_data){
>> +		.conf = (struct rte_flow_action_rss){
>> +			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>> +			.level = 0,
>> +			.types = ETH_RSS_IP |
>> +				ETH_RSS_TCP,
>
>Duplicating it in two places suggests addition of a macro with the value and
>usage in two places.
>
>> +			.key_len = sizeof(rss_data->key),
>> +			.queue_num = queues_number,
>> +			.key = rss_data->key,
>> +			.queue = rss_data->queue,
>> +		},
>> +		.key = { 1 },
>> +		.queue = { 0 },
>> +	};
>> +
>> +	for (queue = 0; queue < queues_number; queue++)
>> +		rss_data->queue[queue] = queues[queue];
>> +
>> +	rss_action = &rss_data->conf;
>> +}
>> +
>> +void
>> +gen_set_meta(void)
>> +{
>> +	meta_action.data = RTE_BE32(META_DATA);
>> +	meta_action.mask = RTE_BE32(0xffffffff); }
>> +
>> +void
>> +gen_set_tag(void)
>> +{
>> +	tag_action.data = RTE_BE32(META_DATA);
>> +	tag_action.mask = RTE_BE32(0xffffffff);
>> +	tag_action.index = TAG_INDEX;
>> +}
>> +
>> +void
>> +gen_port_id(void)
>> +{
>> +	port_id.id = PORT_ID_DST;
>> +}
>>
>
>[snip]
>
>> diff --git a/app/test-flow-perf/flow_gen.c
>> b/app/test-flow-perf/flow_gen.c new file mode 100644 index
>> 000000000..cf5453586
>> --- /dev/null
>> +++ b/app/test-flow-perf/flow_gen.c
>> @@ -0,0 +1,179 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright 2020 Mellanox Technologies, Ltd
>> + *
>> + * The file contains the implementations of the method to
>> + * fill items, actions & attributes in their corresponding
>> + * arrays, and then generate rte_flow rule.
>> + *
>> + * After the generation. The rule goes to validation then
>> + * creation state and then return the results.
>> + */
>> +
>> +#include <stdint.h>
>> +
>> +#include "flow_gen.h"
>> +#include "items_gen.h"
>> +#include "actions_gen.h"
>> +#include "config.h"
>> +
>> +static void
>> +fill_attributes(struct rte_flow_attr *attr,
>> +	uint8_t flow_attrs, uint16_t group)
>> +{
>> +	if (flow_attrs & INGRESS)
>> +		attr->ingress = 1;
>> +	if (flow_attrs & EGRESS)
>> +		attr->egress = 1;
>> +	if (flow_attrs & TRANSFER)
>> +		attr->transfer = 1;
>> +	attr->group = group;
>> +}
>> +
>> +static void
>> +fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint16_t flow_items, uint32_t outer_ip_src) {
>> +	uint8_t items_counter = 0;
>> +
>> +	if (flow_items & META_ITEM)
>> +		add_meta_data(items, items_counter++);
>> +	if (flow_items & TAG_ITEM)
>> +		add_meta_tag(items, items_counter++);
>> +	if (flow_items & ETH_ITEM)
>> +		add_ether(items, items_counter++);
>> +	if (flow_items & VLAN_ITEM)
>> +		add_vlan(items, items_counter++);
>> +	if (flow_items & IPV4_ITEM)
>> +		add_ipv4(items, items_counter++, outer_ip_src);
>> +	if (flow_items & IPV6_ITEM)
>> +		add_ipv6(items, items_counter++, outer_ip_src);
>> +	if (flow_items & TCP_ITEM)
>> +		add_tcp(items, items_counter++);
>> +	if (flow_items & UDP_ITEM)
>> +		add_udp(items, items_counter++);
>> +	if (flow_items & VXLAN_ITEM)
>> +		add_vxlan(items, items_counter++);
>> +	if (flow_items & VXLAN_GPE_ITEM)
>> +		add_vxlan_gpe(items, items_counter++);
>> +	if (flow_items & GRE_ITEM)
>> +		add_gre(items, items_counter++);
>> +	if (flow_items & GENEVE_ITEM)
>> +		add_geneve(items, items_counter++);
>> +	if (flow_items & GTP_ITEM)
>> +		add_gtp(items, items_counter++);
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END; }
>> +
>> +static void
>> +fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
>> +	uint16_t flow_actions, uint32_t counter, uint16_t next_table,
>> +	uint16_t hairpinq)
>> +{
>> +	struct rte_flow_action_count count_action;
>> +	uint8_t actions_counter = 0;
>> +	uint16_t queues[RXQ_NUM];
>> +	uint16_t i;
>> +
>> +	/* None-fate actions */
>> +	if (flow_actions & MARK_ACTION) {
>> +		if (!counter)
>
>DPDK coding style says compare to 0 [1]. Many similar comparisons below.
>
>[1]
>https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdoc.d
>pdk.org%2Fguides%2Fcontributing%2Fcoding_style.html%23null-
>pointers&amp;data=02%7C01%7Cwisamm%40mellanox.com%7C99a2e20aa04
>e49810d5808d7f1d18939%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0
>%7C637243754565957236&amp;sdata=SJ0Ga7yJlxgtaCNyCYDhFv6b8vTkV%2FO
>KccYfB7eR9vg%3D&amp;reserved=0
>
>> +			gen_mark();
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_MARK;
>> +		actions[actions_counter++].conf = &mark_action;
>> +	}
>> +	if (flow_actions & COUNT_ACTION) {
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_COUNT;
>> +		actions[actions_counter++].conf = &count_action;
>> +	}
>> +	if (flow_actions & META_ACTION) {
>> +		if (!counter)
>> +			gen_set_meta();
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_SET_META;
>> +		actions[actions_counter++].conf = &meta_action;
>> +	}
>> +	if (flow_actions & TAG_ACTION) {
>> +		if (!counter)
>> +			gen_set_tag();
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_SET_TAG;
>> +		actions[actions_counter++].conf = &tag_action;
>> +	}
>> +
>> +	/* Fate actions */
>> +	if (flow_actions & QUEUE_ACTION) {
>> +		gen_queue(counter % RXQ_NUM);
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_QUEUE;
>> +		actions[actions_counter++].conf = &queue_action;
>> +	}
>> +	if (flow_actions & RSS_ACTION) {
>> +		if (!counter) {
>> +			for (i = 0; i < RXQ_NUM; i++)
>> +				queues[i] = i;
>> +			gen_rss(queues, RXQ_NUM);
>> +		}
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_RSS;
>> +		actions[actions_counter++].conf = rss_action;
>> +	}
>> +	if (flow_actions & JUMP_ACTION) {
>> +		if (!counter)
>> +			gen_jump(next_table);
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_JUMP;
>> +		actions[actions_counter++].conf = &jump_action;
>> +	}
>> +	if (flow_actions & PORT_ID_ACTION) {
>> +		if (!counter)
>> +			gen_port_id();
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_PORT_ID;
>> +		actions[actions_counter++].conf = &port_id;
>> +	}
>> +	if (flow_actions & DROP_ACTION)
>> +		actions[actions_counter++].type =
>RTE_FLOW_ACTION_TYPE_DROP;
>> +	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
>> +		gen_queue((counter % hairpinq) + RXQ_NUM);
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_QUEUE;
>> +		actions[actions_counter++].conf = &queue_action;
>> +	}
>> +	if (flow_actions & HAIRPIN_RSS_ACTION) {
>> +		if (!counter) {
>> +			uint16_t hairpin_queues[hairpinq];
>> +			for (i = 0; i < hairpinq; i++)
>> +				hairpin_queues[i] = i + RXQ_NUM;
>> +			gen_rss(hairpin_queues, hairpinq);
>> +		}
>> +		actions[actions_counter].type =
>RTE_FLOW_ACTION_TYPE_RSS;
>> +		actions[actions_counter++].conf = rss_action;
>> +	}
>> +
>> +	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END; }
>> +
>> +struct rte_flow *
>> +generate_flow(uint16_t port_id,
>> +	uint16_t group,
>> +	uint8_t flow_attrs,
>> +	uint16_t flow_items,
>> +	uint16_t flow_actions,
>> +	uint16_t next_table,
>> +	uint32_t outer_ip_src,
>> +	uint16_t hairpinq,
>> +	struct rte_flow_error *error)
>> +{
>> +	struct rte_flow_attr attr;
>> +	struct rte_flow_item items[MAX_ITEMS_NUM];
>> +	struct rte_flow_action actions[MAX_ACTIONS_NUM];
>> +	struct rte_flow *flow = NULL;
>> +
>> +	memset(items, 0, sizeof(items));
>> +	memset(actions, 0, sizeof(actions));
>> +	memset(&attr, 0, sizeof(struct rte_flow_attr));
>> +
>> +	fill_attributes(&attr, flow_attrs, group);
>> +
>> +	fill_actions(actions, flow_actions,
>> +			outer_ip_src, next_table, hairpinq);
>> +
>> +	fill_items(items, flow_items, outer_ip_src);
>> +
>> +	flow = rte_flow_create(port_id, &attr, items, actions, error);
>> +	return flow;
>> +}
>> diff --git a/app/test-flow-perf/flow_gen.h
>> b/app/test-flow-perf/flow_gen.h new file mode 100644 index
>> 000000000..43d9e7cfe
>> --- /dev/null
>> +++ b/app/test-flow-perf/flow_gen.h
>> @@ -0,0 +1,63 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright 2020 Mellanox Technologies, Ltd
>> + *
>> + * This file contains the items, actions and attributes
>> + * definition. And the methods to prepare and fill items,
>> + * actions and attributes to generate rte_flow rule.
>> + */
>> +
>> +#ifndef FLOW_PERF_FLOW_GEN
>> +#define FLOW_PERF_FLOW_GEN
>> +
>> +#include <stdint.h>
>> +#include <rte_flow.h>
>> +
>> +#include "config.h"
>> +
>> +#define FLOW_ITEM_MASK(_x) (UINT64_C(1) << _x)
>> +
>> +/* Items */
>> +#define ETH_ITEM             FLOW_ITEM_MASK(0)
>
>I don't understand why you don't use enum rte_flow_item_type members as
>offsets. In this case don't need these defines at all, just use
>FLOW_ITEM_MASK(ETH). It will make it easier to support new items.
>
>> +#define IPV4_ITEM            FLOW_ITEM_MASK(1)
>> +#define IPV6_ITEM            FLOW_ITEM_MASK(2)
>> +#define VLAN_ITEM            FLOW_ITEM_MASK(3)
>> +#define TCP_ITEM             FLOW_ITEM_MASK(4)
>> +#define UDP_ITEM             FLOW_ITEM_MASK(5)
>> +#define VXLAN_ITEM           FLOW_ITEM_MASK(6)
>> +#define VXLAN_GPE_ITEM       FLOW_ITEM_MASK(7)
>> +#define GRE_ITEM             FLOW_ITEM_MASK(8)
>> +#define GENEVE_ITEM          FLOW_ITEM_MASK(9)
>> +#define GTP_ITEM             FLOW_ITEM_MASK(10)
>> +#define META_ITEM            FLOW_ITEM_MASK(11)
>> +#define TAG_ITEM             FLOW_ITEM_MASK(12)
>> +
>> +/* Actions */
>> +#define QUEUE_ACTION         FLOW_ITEM_MASK(0)
>
>I don't understand why you don't use enum rte_flow_action_type members
>as offsets.
>
>> +#define MARK_ACTION          FLOW_ITEM_MASK(1)
>> +#define JUMP_ACTION          FLOW_ITEM_MASK(2)
>> +#define RSS_ACTION           FLOW_ITEM_MASK(3)
>> +#define COUNT_ACTION         FLOW_ITEM_MASK(4)
>> +#define META_ACTION          FLOW_ITEM_MASK(5)
>> +#define TAG_ACTION           FLOW_ITEM_MASK(6)
>> +#define DROP_ACTION          FLOW_ITEM_MASK(7)
>> +#define PORT_ID_ACTION       FLOW_ITEM_MASK(8)
>> +#define HAIRPIN_QUEUE_ACTION FLOW_ITEM_MASK(9)
>> +#define HAIRPIN_RSS_ACTION   FLOW_ITEM_MASK(10)
>> +
>> +/* Attributes */
>> +#define INGRESS              FLOW_ITEM_MASK(0)
>> +#define EGRESS               FLOW_ITEM_MASK(1)
>> +#define TRANSFER             FLOW_ITEM_MASK(2)
>> +
>> +struct rte_flow *
>> +generate_flow(uint16_t port_id,
>> +	uint16_t group,
>> +	uint8_t flow_attrs,
>> +	uint16_t flow_items,
>> +	uint16_t flow_actions,
>> +	uint16_t next_table,
>> +	uint32_t outer_ip_src,
>> +	uint16_t hairpinq,
>> +	struct rte_flow_error *error);
>> +
>> +#endif /* FLOW_PERF_FLOW_GEN */
>> diff --git a/app/test-flow-perf/items_gen.c
>> b/app/test-flow-perf/items_gen.c new file mode 100644 index
>> 000000000..1e9479fb8
>> --- /dev/null
>> +++ b/app/test-flow-perf/items_gen.c
>> @@ -0,0 +1,265 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright 2020 Mellanox Technologies, Ltd
>> + *
>> + * This file contain the implementations of the items
>> + * related methods. Each Item have a method to prepare
>> + * the item and add it into items array in given index.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <rte_flow.h>
>> +
>> +#include "items_gen.h"
>> +#include "config.h"
>> +
>> +/* Current design is single threaded. */ static struct
>> +rte_flow_item_eth eth_spec; static struct rte_flow_item_eth eth_mask;
>
>It looks like that the design has problems with:
>eth / ip4 / udp / vxlan /eth / end
>patterns.

Not problems, but the current support is for outer only:
Up to first tunnel, in future we can have another patch to add the inner items to the app.

>
>> +static struct rte_flow_item_vlan vlan_spec; static struct
>> +rte_flow_item_vlan vlan_mask; static struct rte_flow_item_ipv4
>> +ipv4_spec; static struct rte_flow_item_ipv4 ipv4_mask; static struct
>> +rte_flow_item_ipv6 ipv6_spec; static struct rte_flow_item_ipv6
>> +ipv6_mask; static struct rte_flow_item_udp udp_spec; static struct
>> +rte_flow_item_udp udp_mask; static struct rte_flow_item_tcp tcp_spec;
>> +static struct rte_flow_item_tcp tcp_mask; static struct
>> +rte_flow_item_vxlan vxlan_spec; static struct rte_flow_item_vxlan
>> +vxlan_mask; static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
>> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask; static struct
>> +rte_flow_item_gre gre_spec; static struct rte_flow_item_gre gre_mask;
>> +static struct rte_flow_item_geneve geneve_spec; static struct
>> +rte_flow_item_geneve geneve_mask; static struct rte_flow_item_gtp
>> +gtp_spec; static struct rte_flow_item_gtp gtp_mask; static struct
>> +rte_flow_item_meta meta_spec; static struct rte_flow_item_meta
>> +meta_mask; static struct rte_flow_item_tag tag_spec; static struct
>> +rte_flow_item_tag tag_mask;
>
>I think that such amount of global variables makes the code hard to read and
>maintain. If that's only me, no problem.
>
>> +
>> +
>> +void
>> +add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
>> +	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
>> +	eth_spec.type = 0;
>> +	eth_mask.type = 0;
>
>What's the point to set type to 0 if you just memset the entire structure?
>
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
>> +	items[items_counter].spec = &eth_spec;
>> +	items[items_counter].mask = &eth_mask; }
>> +
>> +void
>> +add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint16_t vlan_value = VLAN_VALUE;
>> +	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
>> +	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
>> +
>> +	vlan_spec.tci = RTE_BE16(vlan_value);
>> +	vlan_mask.tci = RTE_BE16(0xffff);
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
>> +	items[items_counter].spec = &vlan_spec;
>> +	items[items_counter].mask = &vlan_mask; }
>> +
>> +void
>> +add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter, uint32_t src_ipv4)
>
>Shouldn't src_ipv4 be rte_be32?

Doesn't matter, just need to have different ipv4 each time so I think uint32_t will do the job,
And I will not gain anything from converting it to rte_be32_t

>
>> +{
>> +	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
>> +	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
>> +
>> +	ipv4_spec.hdr.src_addr = src_ipv4;
>> +	ipv4_mask.hdr.src_addr = 0xffffffff;
>
>RTE_BE32() is missing above (at least to be consistent with RTE_BE16() few
>lines above.
>
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
>> +	items[items_counter].spec = &ipv4_spec;
>> +	items[items_counter].mask = &ipv4_mask; }
>> +
>> +
>> +void
>> +add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter, int src_ipv6)
>
>I think such specification of the source IPv6 address is very confusing. If you
>really need, it would be nice to explain why comments.

I'm not sure I understand?
the design in the whole app, is to add rules with same items but outer src IP  in increased each time by 1
And it's same case with ipv6, what is the specification here?

>
>> +{
>> +	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
>> +	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
>> +
>> +	/** Set ipv6 src **/
>> +	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
>> +		sizeof(ipv6_spec.hdr.src_addr) / 2);
>> +
>> +	/** Full mask **/
>> +	memset(&ipv6_mask.hdr.src_addr, 1,
>> +		sizeof(ipv6_spec.hdr.src_addr));
>
>Are you that 1 is what you really want here? May be 0xff?
>
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
>> +	items[items_counter].spec = &ipv6_spec;
>> +	items[items_counter].mask = &ipv6_mask; }
>> +
>> +void
>> +add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
>> +	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
>> +	items[items_counter].spec = &tcp_spec;
>> +	items[items_counter].mask = &tcp_mask; }
>> +
>> +void
>> +add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
>> +	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
>> +	items[items_counter].spec = &udp_spec;
>> +	items[items_counter].mask = &udp_mask; }
>> +
>> +void
>> +add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t vni_value = VNI_VALUE;
>> +	uint8_t i;
>> +	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
>> +	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
>> +
>> +	/* Set standard vxlan vni */
>> +	for (i = 0; i < 3; i++) {
>> +		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
>> +		vxlan_mask.vni[2 - i] = 0xff;
>> +	}
>> +
>> +	/* Standard vxlan flags */
>> +	vxlan_spec.flags = 0x8;
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
>> +	items[items_counter].spec = &vxlan_spec;
>> +	items[items_counter].mask = &vxlan_mask; }
>> +
>> +void
>> +add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t vni_value = VNI_VALUE;
>> +	uint8_t i;
>> +	memset(&vxlan_gpe_spec, 0, sizeof(struct
>rte_flow_item_vxlan_gpe));
>> +	memset(&vxlan_gpe_mask, 0, sizeof(struct
>rte_flow_item_vxlan_gpe));
>> +
>> +	/* Set vxlan-gpe vni */
>> +	for (i = 0; i < 3; i++) {
>> +		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
>> +		vxlan_gpe_mask.vni[2 - i] = 0xff;
>> +	}
>> +
>> +	/* vxlan-gpe flags */
>> +	vxlan_gpe_spec.flags = 0x0c;
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
>> +	items[items_counter].spec = &vxlan_gpe_spec;
>> +	items[items_counter].mask = &vxlan_gpe_mask; }
>> +
>> +void
>> +add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint16_t proto = GRE_PROTO;
>> +	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
>> +	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
>> +
>> +	gre_spec.protocol = RTE_BE16(proto);
>> +	gre_mask.protocol = 0xffff;
>
>RTE_BE16(0xffff) to be consistent
>
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
>> +	items[items_counter].spec = &gre_spec;
>> +	items[items_counter].mask = &gre_mask; }
>> +
>> +void
>> +add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t vni_value = VNI_VALUE;
>> +	uint8_t i;
>> +	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
>> +	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
>> +
>> +	for (i = 0; i < 3; i++) {
>> +		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
>> +		geneve_mask.vni[2 - i] = 0xff;
>> +	}
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
>> +	items[items_counter].spec = &geneve_spec;
>> +	items[items_counter].mask = &geneve_mask; }
>> +
>> +void
>> +add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t teid_value = TEID_VALUE;
>> +	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
>> +	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
>> +
>> +	gtp_spec.teid = RTE_BE32(teid_value);
>> +	gtp_mask.teid = RTE_BE32(0xffffffff);
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
>> +	items[items_counter].spec = &gtp_spec;
>> +	items[items_counter].mask = &gtp_mask; }
>> +
>> +void
>> +add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t data = META_DATA;
>> +	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
>> +	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
>> +
>> +	meta_spec.data = RTE_BE32(data);
>> +	meta_mask.data = RTE_BE32(0xffffffff);
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
>> +	items[items_counter].spec = &meta_spec;
>> +	items[items_counter].mask = &meta_mask; }
>> +
>> +
>> +void
>> +add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
>> +	uint8_t items_counter)
>> +{
>> +	uint32_t data = META_DATA;
>> +	uint8_t index = TAG_INDEX;
>> +	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
>> +	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
>> +
>> +	tag_spec.data = RTE_BE32(data);
>> +	tag_mask.data = RTE_BE32(0xffffffff);
>> +	tag_spec.index = index;
>> +	tag_mask.index = 0xff;
>> +
>> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
>> +	items[items_counter].spec = &tag_spec;
>> +	items[items_counter].mask = &tag_mask; }
>
>[snip]
>
>> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
>> index 7a924cdb7..463e4a782 100644
>> --- a/app/test-flow-perf/main.c
>> +++ b/app/test-flow-perf/main.c
>> @@ -35,29 +35,156 @@
>>  #include <rte_flow.h>
>>
>>  #include "config.h"
>> +#include "flow_gen.h"
>>
>> -static uint32_t nb_lcores;
>> +#define MAX_ITERATIONS             100
>> +#define DEFAULT_RULES_COUNT    4000000
>> +#define DEFAULT_ITERATION       100000
>> +
>> +struct rte_flow *flow;
>> +static uint8_t flow_group;
>> +
>> +static uint16_t flow_items;
>> +static uint16_t flow_actions;
>> +static uint8_t flow_attrs;
>> +static volatile bool force_quit;
>> +static bool dump_iterations;
>>  static struct rte_mempool *mbuf_mp;
>> +static uint32_t nb_lcores;
>> +static uint32_t flows_count;
>> +static uint32_t iterations_number;
>> +static uint32_t hairpinq;
>
>Global variables again.

As I mentioned above:
Global vars in main.c:
I think it's ok to leave them as it, since they are control and they straightforward.
Whatever happens to the app those shouldn't cause any issue, they are for the control only

>
>>  static void
>>  usage(char *progname)
>>  {
>>  	printf("\nusage: %s\n", progname);
>> +	printf("\nControl configurations:\n");
>> +	printf("  --flows-count=N: to set the number of needed"
>> +		" flows to insert, default is 4,000,000\n");
>> +	printf("  --dump-iterations: To print rates for each"
>> +		" iteration\n");
>> +
>> +	printf("To set flow attributes:\n");
>> +	printf("  --ingress: set ingress attribute in flows\n");
>> +	printf("  --egress: set egress attribute in flows\n");
>> +	printf("  --transfer: set transfer attribute in flows\n");
>> +	printf("  --group=N: set group for all flows,"
>> +		" default is 0\n");
>> +
>> +	printf("To set flow items:\n");
>> +	printf("  --ether: add ether layer in flow items\n");
>> +	printf("  --vlan: add vlan layer in flow items\n");
>> +	printf("  --ipv4: add ipv4 layer in flow items\n");
>> +	printf("  --ipv6: add ipv6 layer in flow items\n");
>> +	printf("  --tcp: add tcp layer in flow items\n");
>> +	printf("  --udp: add udp layer in flow items\n");
>> +	printf("  --vxlan: add vxlan layer in flow items\n");
>> +	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
>> +	printf("  --gre: add gre layer in flow items\n");
>> +	printf("  --geneve: add geneve layer in flow items\n");
>> +	printf("  --gtp: add gtp layer in flow items\n");
>> +	printf("  --meta: add meta layer in flow items\n");
>> +	printf("  --tag: add tag layer in flow items\n");
>> +
>> +	printf("To set flow actions:\n");
>> +	printf("  --port-id: add port-id action in flow actions\n");
>> +	printf("  --rss: add rss action in flow actions\n");
>> +	printf("  --queue: add queue action in flow actions\n");
>> +	printf("  --jump: add jump action in flow actions\n");
>> +	printf("  --mark: add mark action in flow actions\n");
>> +	printf("  --count: add count action in flow actions\n");
>> +	printf("  --set-meta: add set meta action in flow actions\n");
>> +	printf("  --set-tag: add set tag action in flow actions\n");
>> +	printf("  --drop: add drop action in flow actions\n");
>> +	printf("  --hairpin-queue=N: add hairpin-queue action in flow
>actions\n");
>> +	printf("  --hairpin-rss=N: add hairping-rss action in flow
>> +actions\n");
>>  }
>>
>>  static void
>>  args_parse(int argc, char **argv)
>>  {
>>  	char **argvopt;
>> -	int opt;
>> +	int n, opt, i;
>>  	int opt_idx;
>> +	static const char * const items_str[] = {
>> +		"ether", "vlan", "ipv4", "ipv6",
>
>Typicaly Ethernet pattern item is names "eth" (e.g.
>in testpmd if I'm not mistaken). Why does it differ here?

This is not testpmd, why you think it should be the same?
I think it's more clear to right ether than eth, that's why I used it like this here,
Since it's not related to testpmd with anything I thought it would be ok to choose the app
Options names

>
>> +		"tcp", "udp", "vxlan", "vxlan-gpe",
>> +		"gre", "geneve", "gtp", "meta",
>> +		"tag"
>> +	};
>> +	uint32_t items[] = {
>> +		ETH_ITEM, VLAN_ITEM, IPV4_ITEM,
>> +		IPV6_ITEM, TCP_ITEM, UDP_ITEM,
>> +		VXLAN_ITEM, VXLAN_GPE_ITEM, GRE_ITEM,
>> +		GENEVE_ITEM, GTP_ITEM, META_ITEM,
>> +		TAG_ITEM
>> +	};
>
>I guess above two structures should be both const and must be in sync. If so,
>it would be better to create one array structures with name and mask
>members. It is more robust.
>
>> +	static const char * const attributes_str[] = {
>> +		"ingress", "egress", "transfer"
>> +	};
>> +	uint32_t attributes[] = {
>> +		INGRESS, EGRESS, TRANSFER
>> +	};
>
>Same as above.
>
>> +	static const char * const actions_str[] = {
>> +		"port-id", "rss", "queue", "jump",
>> +		"mark", "count", "set-meta", "set-tag",
>> +		"drop",
>> +	};
>> +	uint32_t actions[] = {
>> +		PORT_ID_ACTION, RSS_ACTION, QUEUE_ACTION,
>> +		JUMP_ACTION, MARK_ACTION, COUNT_ACTION,
>> +		META_ACTION, TAG_ACTION, DROP_ACTION
>> +	};
>
>Same as above.
>
>> +	int items_size = RTE_DIM(items);
>> +	int attributes_size = RTE_DIM(attributes);
>> +	int actions_size = RTE_DIM(actions);
>> +
>>  	static struct option lgopts[] = {
>>  		/* Control */
>>  		{ "help",                       0, 0, 0 },
>> +		{ "flows-count",                1, 0, 0 },
>> +		{ "dump-iterations",            0, 0, 0 },
>> +		/* Attributes */
>> +		{ "ingress",                    0, 0, 0 },
>> +		{ "egress",                     0, 0, 0 },
>> +		{ "transfer",                   0, 0, 0 },
>
>I think it should be possible to add these items in a loop by attributes_str-like
>array.

For this I tried and it became ugly, I prefer it to remain like this,
Why?
The control args are static that are not added into the arrays, so I need to
Put start and end to add each item/action or attribute and the results was ugly

e.g:
for (I = 6; I < 22; i++) {
	logopt[i].name = items[i].str;
	logopt[i].has_arg = 0;
	logopt[i].val = 0;
	logopt[i].flag = 0;
}
for (I = 22; I < 32; i++) {
	logopt[i].name = actions[i].str;
	logopt[i].has_arg = 0;
	logopt[i].val = 0;
	logopt[i].flag = 0;
}
for (I = 32; I < 36; i++) {
	logopt[i].name = attributes[i].str;
	logopt[i].has_arg = 0;
	logopt[i].val = 0;
	logopt[i].flag = 0;
}


>
>> +		{ "group",                      1, 0, 0 },
>> +		/* Items */
>> +		{ "ether",                      0, 0, 0 },
>> +		{ "vlan",                       0, 0, 0 },
>> +		{ "ipv4",                       0, 0, 0 },
>> +		{ "ipv6",                       0, 0, 0 },
>> +		{ "tcp",                        0, 0, 0 },
>> +		{ "udp",                        0, 0, 0 },
>> +		{ "vxlan",                      0, 0, 0 },
>> +		{ "vxlan-gpe",                  0, 0, 0 },
>> +		{ "gre",                        0, 0, 0 },
>> +		{ "geneve",                     0, 0, 0 },
>> +		{ "gtp",                        0, 0, 0 },
>> +		{ "meta",                       0, 0, 0 },
>> +		{ "tag",                        0, 0, 0 },
>
>I think it should be possible to add these items in a loop by items_str-like
>array.  It would allow to avoid copy-paste and simplify new items addition.
>
>> +		/* Actions */
>> +		{ "port-id",                    0, 0, 0 },
>> +		{ "rss",                        0, 0, 0 },
>> +		{ "queue",                      0, 0, 0 },
>> +		{ "jump",                       0, 0, 0 },
>> +		{ "mark",                       0, 0, 0 },
>> +		{ "count",                      0, 0, 0 },
>> +		{ "set-meta",                   0, 0, 0 },
>> +		{ "set-tag",                    0, 0, 0 },
>> +		{ "drop",                       0, 0, 0 },
>
>I think it should be possible to add these items in a loop by actions_str-like
>array.  It would allow to avoid copy-paste and simplify new actions addition.
>
>
>> +		{ "hairpin-queue",              1, 0, 0 },
>> +		{ "hairpin-rss",                1, 0, 0 },
>>  	};
>>
>> +	flow_items = 0;
>> +	flow_actions = 0;
>> +	flow_attrs = 0;
>> +	hairpinq = 0;
>>  	argvopt = argv;
>>
>> +	printf(":: Flow -> ");
>>  	while ((opt = getopt_long(argc, argvopt, "",
>>  				lgopts, &opt_idx)) != EOF) {
>>  		switch (opt) {
>> @@ -66,6 +193,73 @@ args_parse(int argc, char **argv)
>>  				usage(argv[0]);
>>  				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>>  			}
>> +
>> +			/* Attributes */
>> +			for (i = 0; i < attributes_size; i++)
>> +				if (!strcmp(lgopts[opt_idx].name,
>> +						attributes_str[i])) {
>
>Comparison vs 0. Many cases below.
>
>> +					flow_attrs |= attributes[i];
>> +					printf("%s / ", attributes_str[i]);
>> +				}
>> +			if (!strcmp(lgopts[opt_idx].name, "group")) {
>> +				n = atoi(optarg);
>> +				if (n >= 0)
>> +					flow_group = n;
>> +				else
>> +					rte_exit(EXIT_SUCCESS,
>> +						"flow group should be >= 0");
>> +				printf("group %d ", flow_group);
>> +			}
>> +
>> +			/* Items */
>> +			for (i = 0; i < items_size; i++)
>> +				if (!strcmp(lgopts[opt_idx].name,
>> +						items_str[i])) {
>> +					flow_items |= items[i];
>> +					printf("%s / ", items_str[i]);
>> +				}
>> +
>> +			/* Actions */
>> +			for (i = 0; i < actions_size; i++)
>> +				if (!strcmp(lgopts[opt_idx].name,
>> +						actions_str[i])) {
>> +					flow_actions |= actions[i];
>> +					printf("%s / ", actions_str[i]);
>> +				}
>> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
>> +				n = atoi(optarg);
>> +				if (n > 0)
>> +					hairpinq = n;
>> +				else
>> +					rte_exit(EXIT_SUCCESS, "Hairpin
>queues should be > 0 ");
>> +
>> +				flow_actions |= HAIRPIN_RSS_ACTION;
>> +				printf("hairpin-rss / ");
>> +			}
>> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
>> +				n = atoi(optarg);
>> +				if (n > 0)
>> +					hairpinq = n;
>> +				else
>> +					rte_exit(EXIT_SUCCESS, "Hairpin
>queues should be > 0 ");
>> +
>> +				flow_actions |= HAIRPIN_QUEUE_ACTION;
>> +				printf("hairpin-queue / ");
>> +			}
>> +
>> +			/* Control */
>> +			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
>> +				n = atoi(optarg);
>> +				if (n > (int) iterations_number)
>> +					flows_count = n;
>> +				else {
>> +					printf("\n\nflows_count should be >
>%d",
>> +						iterations_number);
>> +					rte_exit(EXIT_SUCCESS, " ");
>> +				}
>> +			}
>> +			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
>> +				dump_iterations = true;
>>  			break;
>>  		default:
>>  			printf("Invalid option: %s\n", argv[optind]); @@ -
>74,15 +268,141
>> @@ args_parse(int argc, char **argv)
>>  			break;
>>  		}
>>  	}
>> +	printf("end_flow\n");
>> +}
>> +
>> +static void
>> +print_flow_error(struct rte_flow_error error) {
>> +	printf("Flow can't be created %d message: %s\n",
>> +		error.type,
>> +		error.message ? error.message : "(no stated reason)"); }
>> +
>> +static inline void
>> +flows_handler(void)
>> +{
>> +	struct rte_flow_error error;
>> +	clock_t start_iter, end_iter;
>> +	double cpu_time_used;
>> +	double flows_rate;
>> +	double cpu_time_per_iter[MAX_ITERATIONS];
>> +	double delta;
>> +	uint16_t nr_ports;
>> +	uint32_t i;
>> +	int port_id;
>> +	int iter_id;
>> +	uint32_t eagain_counter = 0;
>> +
>> +	nr_ports = rte_eth_dev_count_avail();
>> +
>> +	for (i = 0; i < MAX_ITERATIONS; i++)
>> +		cpu_time_per_iter[i] = -1;
>> +
>> +	if (iterations_number > flows_count)
>> +		iterations_number = flows_count;
>> +
>> +	printf(":: Flows Count per port: %d\n", flows_count);
>> +
>> +	for (port_id = 0; port_id < nr_ports; port_id++) {
>> +		cpu_time_used = 0;
>> +		if (flow_group > 0) {
>> +			/*
>> +			 * Create global rule to jump into flow_group,
>> +			 * this way the app will avoid the default rules.
>> +			 *
>> +			 * Golbal rule:
>> +			 * group 0 eth / end actions jump group <flow_group>
>> +			 *
>> +			 */
>> +			flow = generate_flow(port_id, 0, flow_attrs,
>ETH_ITEM,
>> +				JUMP_ACTION, flow_group, 0, 0, &error);
>> +
>> +			if (!flow) {
>
>Comparison vs NULL
>
>> +				print_flow_error(error);
>> +				rte_exit(EXIT_FAILURE, "error in creating
>flow");
>> +			}
>> +		}
>> +
>> +		/* Insertion Rate */
>> +		printf("Flows insertion on port = %d\n", port_id);
>> +		start_iter = clock();
>> +		for (i = 0; i < flows_count; i++) {
>> +			do {
>> +				rte_errno = 0;
>> +				flow = generate_flow(port_id, flow_group,
>> +					flow_attrs, flow_items, flow_actions,
>> +					JUMP_ACTION_TABLE, i, hairpinq,
>&error);
>> +				if (!flow)
>> +					eagain_counter++;
>> +			} while (rte_errno == EAGAIN);
>> +
>> +			if (force_quit)
>> +				i = flows_count;
>> +
>> +			if (!flow) {
>> +				print_flow_error(error);
>> +				rte_exit(EXIT_FAILURE, "error in creating
>flow");
>> +			}
>> +
>> +			if (i && !((i + 1) % iterations_number)) {
>> +				/* Save the insertion rate of each iter */
>> +				end_iter = clock();
>> +				delta = (double) (end_iter - start_iter);
>> +				iter_id = ((i + 1) / iterations_number) - 1;
>> +				cpu_time_per_iter[iter_id] =
>> +					delta / CLOCKS_PER_SEC;
>> +				cpu_time_used +=
>cpu_time_per_iter[iter_id];
>> +				start_iter = clock();
>> +			}
>> +		}
>> +
>> +		/* Iteration rate per iteration */
>> +		if (dump_iterations)
>> +			for (i = 0; i < MAX_ITERATIONS; i++) {
>> +				if (cpu_time_per_iter[i] == -1)
>> +					continue;
>> +				delta = (double)(iterations_number /
>> +					cpu_time_per_iter[i]);
>> +				flows_rate = delta / 1000;
>> +				printf(":: Iteration #%d: %d flows "
>> +					"in %f sec[ Rate = %f K/Sec ]\n",
>> +					i, iterations_number,
>> +					cpu_time_per_iter[i], flows_rate);
>> +			}
>> +
>> +		/* Insertion rate for all flows */
>> +		flows_rate = ((double) (flows_count / cpu_time_used) /
>1000);
>> +		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
>> +						flows_rate);
>> +		printf(":: The time for creating %d in flows %f seconds\n",
>> +						flows_count, cpu_time_used);
>> +		printf(":: EAGAIN counter = %d\n", eagain_counter);
>> +	}
>> +}
>> +
>> +static void
>> +signal_handler(int signum)
>> +{
>> +	if (signum == SIGINT || signum == SIGTERM) {
>> +		printf("\n\nSignal %d received, preparing to exit...\n",
>> +					signum);
>> +		printf("Error: Stats are wrong due to sudden signal!\n\n");
>> +		force_quit = true;
>> +	}
>
>It is the patch which starts to use sigint.h and it should be included in the
>patch, not the previous one.
>
>>  }
>>
>>  static void
>>  init_port(void)
>>  {
>>  	int ret;
>> -	uint16_t i;
>> +	uint16_t i, j;
>>  	uint16_t port_id;
>>  	uint16_t nr_ports;
>> +	uint16_t nr_queues;
>> +	struct rte_eth_hairpin_conf hairpin_conf = {
>> +		.peer_count = 1,
>> +	};
>>  	struct rte_eth_conf port_conf = {
>>  		.rx_adv_conf = {
>>  			.rss_conf.rss_hf =
>> @@ -94,6 +414,10 @@ init_port(void)
>>  	struct rte_eth_rxconf rxq_conf;
>>  	struct rte_eth_dev_info dev_info;
>>
>> +	nr_queues = RXQ_NUM;
>> +	if (hairpinq)
>
>Comparison vs 0
>
>> +		nr_queues = RXQ_NUM + hairpinq;
>> +
>>  	nr_ports = rte_eth_dev_count_avail();
>>  	if (nr_ports == 0)
>>  		rte_exit(EXIT_FAILURE, "Error: no port detected\n"); @@ -
>118,8
>> +442,8 @@ init_port(void)
>>
>>  		printf(":: initializing port: %d\n", port_id);
>>
>> -		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
>> -				TXQ_NUM, &port_conf);
>> +		ret = rte_eth_dev_configure(port_id, nr_queues,
>> +				nr_queues, &port_conf);
>>  		if (ret < 0)
>>  			rte_exit(EXIT_FAILURE,
>>  				":: cannot configure device: err=%d,
>port=%u\n", @@ -159,6
>> +483,30 @@ init_port(void)
>>  				":: promiscuous mode enable failed: err=%s,
>port=%u\n",
>>  				rte_strerror(-ret), port_id);
>>
>> +		if (hairpinq) {
>> +			for (i = RXQ_NUM, j = 0; i < nr_queues; i++, j++) {
>> +				hairpin_conf.peers[0].port = port_id;
>> +				hairpin_conf.peers[0].queue = j + TXQ_NUM;
>> +				ret =
>rte_eth_rx_hairpin_queue_setup(port_id, i,
>> +					NR_RXD, &hairpin_conf);
>> +				if (ret != 0)
>> +					rte_exit(EXIT_FAILURE,
>> +						":: Hairpin rx queue setup
>failed: err=%d, port=%u\n",
>> +						ret, port_id);
>> +			}
>> +
>> +			for (i = TXQ_NUM, j = 0; i < nr_queues; i++, j++) {
>> +				hairpin_conf.peers[0].port = port_id;
>> +				hairpin_conf.peers[0].queue = j + RXQ_NUM;
>> +				ret =
>rte_eth_tx_hairpin_queue_setup(port_id, i,
>> +					NR_TXD, &hairpin_conf);
>> +				if (ret != 0)
>> +					rte_exit(EXIT_FAILURE,
>> +						":: Hairpin tx queue setup
>failed: err=%d, port=%u\n",
>> +						ret, port_id);
>> +			}
>> +		}
>> +
>>  		ret = rte_eth_dev_start(port_id);
>>  		if (ret < 0)
>>  			rte_exit(EXIT_FAILURE,
>> @@ -180,6 +528,15 @@ main(int argc, char **argv)
>>  	if (ret < 0)
>>  		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>>
>> +	force_quit = false;
>> +	dump_iterations = false;
>> +	flows_count = DEFAULT_RULES_COUNT;
>> +	iterations_number = DEFAULT_ITERATION;
>> +	flow_group = 0;
>> +
>> +	signal(SIGINT, signal_handler);
>> +	signal(SIGTERM, signal_handler);
>> +
>>  	argc -= ret;
>>  	argv += ret;
>>  	if (argc > 1)
>> @@ -191,6 +548,8 @@ main(int argc, char **argv)
>>  	if (nb_lcores <= 1)
>>  		rte_exit(EXIT_FAILURE, "This app needs at least two
>cores\n");
>>
>> +	flows_handler();
>> +
>>  	RTE_ETH_FOREACH_DEV(port) {
>>  		rte_flow_flush(port, &error);
>>  		rte_eth_dev_stop(port);
>
>[snip]
>
>> diff --git a/doc/guides/tools/flow-perf.rst
>> b/doc/guides/tools/flow-perf.rst index 49eb450ae..b45fccd69 100644
>> --- a/doc/guides/tools/flow-perf.rst
>> +++ b/doc/guides/tools/flow-perf.rst
>> @@ -4,7 +4,18 @@
>>  Flow performance tool
>>  =====================
>>
>> -Application for rte_flow performance testing.
>> +Application for rte_flow performance testing. The application provide
>> +the
>
>As far as I remember documentaiton guidelines recommend to start new
>sentenses from a new line.
>
>> +ability to test insertion rate of specific rte_flow rule, by
>> +stressing it to the NIC, and calculate the insertion rate.
>> +
>> +The application offers some options in the command line, to configure
>> +which rule to apply.
>> +
>> +After that the application will start producing rules with same
>> +pattern but increasing the outer IP source address by 1 each time,
>> +thus it will give different flow each time, and all other items will have open
>masks.
>> +
>> +The current design have single core insertion rate.
>>
>>
>>  Compiling the Application
>
>[snip]


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application
  2020-05-06 12:36                 ` [dpdk-dev] [PATCH v5 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
  2020-05-06 14:25                   ` Andrew Rybchenko
@ 2020-05-11 11:08                   ` Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
                                       ` (4 more replies)
  2020-05-11 11:09                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
  2 siblings, 5 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko

Add new application to test rte flow performance from:
- Insertion rate.
- Deletion rate.
- Memory consumption.
- PPS forward measurement.

---
v6:
* Move compare vs 0 logic instead of logical not.
* Logged errors into stderr.
* Remove offload parse on queue level.
* Fix documentation and limitations.
* Added macro for rss hash value, used in port and rss action configuration.
* Remove all design global variable, only control left.
* Remove items/action defines for bit map and use rte_types instead.
* Addressing comments regard args_parse()

v5:
* Add app to 20.05 release notes.
* Addressing comments.
* Fix compilation issue for gcc >= 10.
* Fix documentation.
* Remove unneeded CFLAGS.
* Remove unused includes.
* Addressing format comments.
* Move hairpin to be option use only.
* Use RSS hash IP + TCP in ports and rss action.
* Introduce and use new macro for bit flags.

v4:
* Fix compilation error due to variable set but not used.

v3:
* Fix passing hairpin queues to hairpin rss action.

v2:
* reset cpu_time_used every port.
* generate different RSS action every flow with different RETA.
* Fix in commit log message


Wisam Jaddo (5):
  app/flow-perf: add flow performance skeleton
  app/flow-perf: add insertion rate calculation
  app/flow-perf: add deletion rate calculation
  app/flow-perf: add memory dump to app
  app/flow-perf: add packet forwarding support

 MAINTAINERS                            |    5 +
 app/Makefile                           |    1 +
 app/meson.build                        |    1 +
 app/test-flow-perf/Makefile            |   26 +
 app/test-flow-perf/actions_gen.c       |  164 ++++
 app/test-flow-perf/actions_gen.h       |   29 +
 app/test-flow-perf/config.h            |   30 +
 app/test-flow-perf/flow_gen.c          |  145 +++
 app/test-flow-perf/flow_gen.h          |   37 +
 app/test-flow-perf/items_gen.c         |  277 ++++++
 app/test-flow-perf/items_gen.h         |   31 +
 app/test-flow-perf/main.c              | 1116 ++++++++++++++++++++++++
 app/test-flow-perf/meson.build         |   11 +
 config/common_base                     |    5 +
 doc/guides/rel_notes/release_20_05.rst |   10 +
 doc/guides/tools/flow-perf.rst         |  247 ++++++
 doc/guides/tools/index.rst             |    1 +
 17 files changed, 2136 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/config.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 doc/guides/tools/flow-perf.rst

-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 1/5] app/flow-perf: add flow performance skeleton
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
@ 2020-05-11 11:08                     ` Wisam Jaddo
  2020-06-04 13:34                       ` [dpdk-dev] [PATCH v7 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 2/5] app/flow-perf: add insertion rate calculation Wisam Jaddo
                                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko

Add flow performance application skeleton.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 MAINTAINERS                            |   5 +
 app/Makefile                           |   1 +
 app/meson.build                        |   1 +
 app/test-flow-perf/Makefile            |  23 +++
 app/test-flow-perf/config.h            |  14 ++
 app/test-flow-perf/main.c              | 194 +++++++++++++++++++++++++
 app/test-flow-perf/meson.build         |   8 +
 config/common_base                     |   5 +
 doc/guides/rel_notes/release_20_05.rst |   4 +
 doc/guides/tools/flow-perf.rst         |  44 ++++++
 doc/guides/tools/index.rst             |   1 +
 11 files changed, 300 insertions(+)
 create mode 100644 app/test-flow-perf/Makefile
 create mode 100644 app/test-flow-perf/config.h
 create mode 100644 app/test-flow-perf/main.c
 create mode 100644 app/test-flow-perf/meson.build
 create mode 100644 doc/guides/tools/flow-perf.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index e05c80504..7d678e15d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1506,6 +1506,11 @@ T: git://dpdk.org/next/dpdk-next-net
 F: app/test-pmd/
 F: doc/guides/testpmd_app_ug/
 
+Flow performance tool
+M: Wisam Jaddo <wisamm@mellanox.com>
+F: app/test-flow-perf
+F: doc/guides/flow-perf.rst
+
 Compression performance test application
 T: git://dpdk.org/next/dpdk-next-crypto
 F: app/test-compress-perf/
diff --git a/app/Makefile b/app/Makefile
index 823771c5f..0392a7de0 100644
--- a/app/Makefile
+++ b/app/Makefile
@@ -10,6 +10,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += pdump
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += test-acl
 DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test-cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_FIB) += test-fib
+DIRS-$(CONFIG_RTE_TEST_FLOW_PERF) += test-flow-perf
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += test-pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += test-sad
 
diff --git a/app/meson.build b/app/meson.build
index 0f7fe9464..408676b06 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -15,6 +15,7 @@ apps = [
 	'test-crypto-perf',
 	'test-eventdev',
 	'test-fib',
+	'test-flow-perf',
 	'test-pipeline',
 	'test-pmd',
 	'test-sad']
diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
new file mode 100644
index 000000000..db043c17a
--- /dev/null
+++ b/app/test-flow-perf/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_TEST_FLOW_PERF),y)
+
+#
+# library name
+#
+APP = dpdk-test-flow-perf
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += main.c
+
+include $(RTE_SDK)/mk/rte.app.mk
+
+endif
diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
new file mode 100644
index 000000000..cf41e0345
--- /dev/null
+++ b/app/test-flow-perf/config.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Claus
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+
+#define GET_RSS_HF() (ETH_RSS_IP | ETH_RSS_TCP)
+
+/* Configuration */
+#define RXQ_NUM 4
+#define TXQ_NUM 4
+#define TOTAL_MBUF_NUM 32000
+#define MBUF_SIZE 2048
+#define MBUF_CACHE_SIZE 512
+#define NR_RXD  256
+#define NR_TXD  256
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
new file mode 100644
index 000000000..8659870af
--- /dev/null
+++ b/app/test-flow-perf/main.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contain the application main file
+ * This application provides the user the ability to test the
+ * insertion rate for specific rte_flow rule under stress state ~4M rule/
+ *
+ * Then it will also provide packet per second measurement after installing
+ * all rules, the user may send traffic to test the PPS that match the rules
+ * after all rules are installed, to check performance or functionality after
+ * the stress.
+ *
+ * The flows insertion will go for all ports first, then it will print the
+ * results, after that the application will go into forwarding packets mode
+ * it will start receiving traffic if any and then forwarding it back and
+ * gives packet per second measurement.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <sys/time.h>
+
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+static uint32_t nb_lcores;
+static struct rte_mempool *mbuf_mp;
+
+static void
+usage(char *progname)
+{
+	printf("\nusage: %s\n", progname);
+}
+
+static void
+args_parse(int argc, char **argv)
+{
+	char **argvopt;
+	int opt;
+	int opt_idx;
+	static struct option lgopts[] = {
+		/* Control */
+		{ "help",                       0, 0, 0 },
+	};
+
+	argvopt = argv;
+
+	while ((opt = getopt_long(argc, argvopt, "",
+				lgopts, &opt_idx)) != EOF) {
+		switch (opt) {
+		case 0:
+			if (strcmp(lgopts[opt_idx].name, "help") == 0) {
+				usage(argv[0]);
+				rte_exit(EXIT_SUCCESS, "Displayed help\n");
+			}
+			break;
+		default:
+			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
+			usage(argv[0]);
+			rte_exit(EXIT_SUCCESS, "Invalid option\n");
+			break;
+		}
+	}
+}
+
+static void
+init_port(void)
+{
+	int ret;
+	uint16_t std_queue;
+	uint16_t port_id;
+	uint16_t nr_ports;
+	struct rte_eth_conf port_conf = {
+		.rx_adv_conf = {
+			.rss_conf.rss_hf =
+				GET_RSS_HF(),
+		}
+	};
+	struct rte_eth_txconf txq_conf;
+	struct rte_eth_rxconf rxq_conf;
+	struct rte_eth_dev_info dev_info;
+
+	nr_ports = rte_eth_dev_count_avail();
+	if (nr_ports == 0)
+		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
+
+	mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
+					TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
+					0, MBUF_SIZE,
+					rte_socket_id());
+	if (mbuf_mp == NULL)
+		rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		ret = rte_eth_dev_info_get(port_id, &dev_info);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				"Error during getting device"
+				" (port %u) info: %s\n",
+				port_id, strerror(-ret));
+
+		port_conf.txmode.offloads &= dev_info.tx_offload_capa;
+		port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
+
+		printf(":: initializing port: %d\n", port_id);
+
+		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
+				TXQ_NUM, &port_conf);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				":: cannot configure device: err=%d, port=%u\n",
+				ret, port_id);
+
+		rxq_conf = dev_info.default_rxconf;
+		for (std_queue = 0; std_queue < RXQ_NUM; std_queue++) {
+			ret = rte_eth_rx_queue_setup(port_id, std_queue, NR_RXD,
+					rte_eth_dev_socket_id(port_id),
+					&rxq_conf,
+					mbuf_mp);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					":: Rx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		txq_conf = dev_info.default_txconf;
+		for (std_queue = 0; std_queue < TXQ_NUM; std_queue++) {
+			ret = rte_eth_tx_queue_setup(port_id, std_queue, NR_TXD,
+					rte_eth_dev_socket_id(port_id),
+					&txq_conf);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					":: Tx queue setup failed: err=%d, port=%u\n",
+					ret, port_id);
+		}
+
+		/* Catch all packets from traffic generator. */
+		ret = rte_eth_promiscuous_enable(port_id);
+		if (ret != 0)
+			rte_exit(EXIT_FAILURE,
+				":: promiscuous mode enable failed: err=%s, port=%u\n",
+				rte_strerror(-ret), port_id);
+
+		ret = rte_eth_dev_start(port_id);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE,
+				"rte_eth_dev_start:err=%d, port=%u\n",
+				ret, port_id);
+
+		printf(":: initializing port: %d done\n", port_id);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	int ret;
+	uint16_t port;
+	struct rte_flow_error error;
+
+	ret = rte_eal_init(argc, argv);
+	if (ret < 0)
+		rte_exit(EXIT_FAILURE, "EAL init failed\n");
+
+	argc -= ret;
+	argv += ret;
+	if (argc > 1)
+		args_parse(argc, argv);
+
+	init_port();
+
+	nb_lcores = rte_lcore_count();
+	if (nb_lcores <= 1)
+		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
+
+	RTE_ETH_FOREACH_DEV(port) {
+		rte_flow_flush(port, &error);
+		rte_eth_dev_stop(port);
+		rte_eth_dev_close(port);
+	}
+	return 0;
+}
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
new file mode 100644
index 000000000..25711378f
--- /dev/null
+++ b/app/test-flow-perf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2020 Mellanox Technologies, Ltd
+
+sources = files(
+	'main.c',
+)
+
+deps += ['ethdev']
diff --git a/config/common_base b/config/common_base
index 14000ba07..b2edd5267 100644
--- a/config/common_base
+++ b/config/common_base
@@ -1105,6 +1105,11 @@ CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 
+#
+# Compile the rte flow perf application
+#
+CONFIG_RTE_TEST_FLOW_PERF=y
+
 #
 # Compile the bbdev test application
 #
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index b124c3f28..7abcae3aa 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -212,6 +212,10 @@ New Features
   * Added IPsec inbound load-distribution support for ipsec-secgw application
     using NIC load distribution feature(Flow Director).
 
+* **Added flow performance application.**
+
+  Add new application to test rte_flow performance.
+
 
 Removed Items
 -------------
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
new file mode 100644
index 000000000..49eb450ae
--- /dev/null
+++ b/doc/guides/tools/flow-perf.rst
@@ -0,0 +1,44 @@
+..	SPDX-License-Identifier: BSD-3-Clause
+	Copyright 2020 Mellanox Technologies, Ltd
+
+Flow performance tool
+=====================
+
+Application for rte_flow performance testing.
+
+
+Compiling the Application
+=========================
+
+The ``test-flow-perf`` application is compiled as part of the main compilation
+of the DPDK libraries and tools.
+
+Refer to the DPDK Getting Started Guides for details.
+
+
+Running the Application
+=======================
+
+EAL Command-line Options
+------------------------
+
+Please refer to :doc:`EAL parameters (Linux) <../linux_gsg/linux_eal_parameters>`
+or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
+a list of available EAL command-line options.
+
+
+Flow performance Options
+------------------------
+
+The following are the command-line options for the flow performance application.
+They must be separated from the EAL options, shown in the previous section,
+with a ``--`` separator:
+
+.. code-block:: console
+
+	sudo ./dpdk-test-flow-perf -n 4 -w 08:00.0 --
+
+The command line options are:
+
+*	``--help``
+	Display a help message and quit.
diff --git a/doc/guides/tools/index.rst b/doc/guides/tools/index.rst
index 782b30864..7279daebc 100644
--- a/doc/guides/tools/index.rst
+++ b/doc/guides/tools/index.rst
@@ -16,3 +16,4 @@ DPDK Tools User Guides
     cryptoperf
     comp_perf
     testeventdev
+    flow-perf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 2/5] app/flow-perf: add insertion rate calculation
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
@ 2020-05-11 11:08                     ` Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 3/5] app/flow-perf: add deletion " Wisam Jaddo
                                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko

Add insertion rate calculation feature into flow
performance application.

The application now provide the ability to test
insertion rate of specific rte_flow rule, by
stressing it to the NIC, and calculate the
insertion rate.

The application offers some options in the command
line, to configure which rule to apply.

After that the application will start producing
rules with same pattern but increasing the outer IP
source address by 1 each time, thus it will give
different flow each time, and all other items will
have open masks.

The current design have single core insertion rate.
In the future we may have a multi core insertion
rate measurement support in the app.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/Makefile            |   3 +
 app/test-flow-perf/actions_gen.c       | 164 +++++++++
 app/test-flow-perf/actions_gen.h       |  29 ++
 app/test-flow-perf/config.h            |  16 +
 app/test-flow-perf/flow_gen.c          | 145 ++++++++
 app/test-flow-perf/flow_gen.h          |  37 ++
 app/test-flow-perf/items_gen.c         | 277 +++++++++++++++
 app/test-flow-perf/items_gen.h         |  31 ++
 app/test-flow-perf/main.c              | 472 ++++++++++++++++++++++++-
 app/test-flow-perf/meson.build         |   3 +
 doc/guides/rel_notes/release_20_05.rst |   3 +
 doc/guides/tools/flow-perf.rst         | 195 +++++++++-
 12 files changed, 1368 insertions(+), 7 deletions(-)
 create mode 100644 app/test-flow-perf/actions_gen.c
 create mode 100644 app/test-flow-perf/actions_gen.h
 create mode 100644 app/test-flow-perf/flow_gen.c
 create mode 100644 app/test-flow-perf/flow_gen.h
 create mode 100644 app/test-flow-perf/items_gen.c
 create mode 100644 app/test-flow-perf/items_gen.h

diff --git a/app/test-flow-perf/Makefile b/app/test-flow-perf/Makefile
index db043c17a..4f2db7591 100644
--- a/app/test-flow-perf/Makefile
+++ b/app/test-flow-perf/Makefile
@@ -16,6 +16,9 @@ CFLAGS += $(WERROR_FLAGS)
 #
 # all source are stored in SRCS-y
 #
+SRCS-y += actions_gen.c
+SRCS-y += flow_gen.c
+SRCS-y += items_gen.c
 SRCS-y += main.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
new file mode 100644
index 000000000..16bb3cf20
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * The file contains the implementations of actions generators.
+ * Each generator is responsible for preparing it's action instance
+ * and initializing it with needed data.
+ */
+
+#include <sys/types.h>
+#include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_ethdev.h>
+
+#include "actions_gen.h"
+#include "config.h"
+
+/* Storage for struct rte_flow_action_rss including external data. */
+struct action_rss_data {
+	struct rte_flow_action_rss conf;
+	uint8_t key[40];
+	uint16_t queue[128];
+};
+
+void
+add_mark(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	static struct rte_flow_action_mark mark_action;
+
+	do {
+		mark_action.id = MARK_ID;
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
+	actions[actions_counter].conf = &mark_action;
+}
+
+void
+add_queue(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t queue)
+{
+	static struct rte_flow_action_queue queue_action;
+
+	do {
+		queue_action.index = queue;
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
+	actions[actions_counter].conf = &queue_action;
+}
+
+void
+add_jump(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t next_table)
+{
+	static struct rte_flow_action_jump jump_action;
+
+	do {
+		jump_action.group = next_table;
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
+	actions[actions_counter].conf = &jump_action;
+}
+
+void
+add_rss(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t *queues,
+	uint16_t queues_number)
+{
+	static struct rte_flow_action_rss *rss_action;
+	static struct action_rss_data *rss_data;
+	uint16_t queue;
+
+	rss_data = rte_malloc("rss_data",
+		sizeof(struct action_rss_data), 0);
+
+	if (rss_data == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	*rss_data = (struct action_rss_data){
+		.conf = (struct rte_flow_action_rss){
+			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+			.level = 0,
+			.types = GET_RSS_HF(),
+			.key_len = sizeof(rss_data->key),
+			.queue_num = queues_number,
+			.key = rss_data->key,
+			.queue = rss_data->queue,
+		},
+		.key = { 1 },
+		.queue = { 0 },
+	};
+
+	for (queue = 0; queue < queues_number; queue++)
+		rss_data->queue[queue] = queues[queue];
+
+	rss_action = &rss_data->conf;
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
+	actions[actions_counter++].conf = rss_action;
+}
+
+void
+add_set_meta(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	static struct rte_flow_action_set_meta meta_action;
+
+	do {
+		meta_action.data = RTE_BE32(META_DATA);
+		meta_action.mask = RTE_BE32(0xffffffff);
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
+	actions[actions_counter++].conf = &meta_action;
+}
+
+void
+add_set_tag(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	static struct rte_flow_action_set_tag tag_action;
+
+	do {
+		tag_action.data = RTE_BE32(META_DATA);
+		tag_action.mask = RTE_BE32(0xffffffff);
+		tag_action.index = TAG_INDEX;
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
+	actions[actions_counter++].conf = &tag_action;
+}
+
+void
+add_port_id(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	static struct rte_flow_action_port_id port_id;
+
+	do {
+		port_id.id = PORT_ID_DST;
+	} while (0);
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
+	actions[actions_counter++].conf = &port_id;
+}
+
+void
+add_drop(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
+}
+
+void
+add_count(struct rte_flow_action *actions,
+	uint8_t actions_counter)
+{
+	static struct rte_flow_action_count count_action;
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
+	actions[actions_counter++].conf = &count_action;
+}
diff --git a/app/test-flow-perf/actions_gen.h b/app/test-flow-perf/actions_gen.h
new file mode 100644
index 000000000..bc7d084f3
--- /dev/null
+++ b/app/test-flow-perf/actions_gen.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the functions definitions to
+ * generate each supported action.
+ */
+
+#ifndef FLOW_PERF_ACTION_GEN
+#define FLOW_PERF_ACTION_GEN
+
+#include <rte_flow.h>
+
+#include "config.h"
+
+void add_mark(struct rte_flow_action *actions, uint8_t actions_counter);
+void add_queue(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t queue);
+void add_jump(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t next_table);
+void add_rss(struct rte_flow_action *actions,
+	uint8_t actions_counter, uint16_t *queues,
+	uint16_t queues_number);
+void add_set_meta(struct rte_flow_action *actions, uint8_t actions_counter);
+void add_set_tag(struct rte_flow_action *actions, uint8_t actions_counter);
+void add_port_id(struct rte_flow_action *actions, uint8_t actions_counter);
+void add_drop(struct rte_flow_action *actions, uint8_t actions_counter);
+void add_count(struct rte_flow_action *actions, uint8_t actions_counter);
+
+#endif /* FLOW_PERF_ACTION_GEN */
diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
index cf41e0345..f16d0de77 100644
--- a/app/test-flow-perf/config.h
+++ b/app/test-flow-perf/config.h
@@ -2,6 +2,7 @@
  * Copyright 2020 Mellanox Technologies, Ltd
  */
 
+#define FLOW_ITEM_MASK(_x) (UINT64_C(1) << _x)
 #define GET_RSS_HF() (ETH_RSS_IP | ETH_RSS_TCP)
 
 /* Configuration */
@@ -12,3 +13,18 @@
 #define MBUF_CACHE_SIZE 512
 #define NR_RXD  256
 #define NR_TXD  256
+
+/* Items/Actions parameters */
+#define JUMP_ACTION_TABLE 2
+#define VLAN_VALUE 1
+#define VNI_VALUE 1
+#define GRE_PROTO  0x6558
+#define META_DATA 1
+#define TAG_INDEX 0
+#define PORT_ID_DST 1
+#define MARK_ID 1
+#define TEID_VALUE 1
+
+/* Flow items/acctions max size */
+#define MAX_ITEMS_NUM 32
+#define MAX_ACTIONS_NUM 32
diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
new file mode 100644
index 000000000..50066d99e
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.c
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * The file contains the implementations of the method to
+ * fill items, actions & attributes in their corresponding
+ * arrays, and then generate rte_flow rule.
+ *
+ * After the generation. The rule goes to validation then
+ * creation state and then return the results.
+ */
+
+#include <stdint.h>
+
+#include "flow_gen.h"
+#include "items_gen.h"
+#include "actions_gen.h"
+#include "config.h"
+
+static void
+fill_attributes(struct rte_flow_attr *attr,
+	uint32_t flow_attrs, uint16_t group)
+{
+	if (flow_attrs & INGRESS)
+		attr->ingress = 1;
+	if (flow_attrs & EGRESS)
+		attr->egress = 1;
+	if (flow_attrs & TRANSFER)
+		attr->transfer = 1;
+	attr->group = group;
+}
+
+static void
+fill_items(struct rte_flow_item *items,
+	uint32_t flow_items, uint32_t outer_ip_src)
+{
+	uint8_t items_counter = 0;
+
+	/* Support outer items up to tunnel layer only. */
+
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_META))
+		add_meta_data(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TAG))
+		add_meta_tag(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH))
+		add_ether(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VLAN))
+		add_vlan(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV4))
+		add_ipv4(items, items_counter++, outer_ip_src);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV6))
+		add_ipv6(items, items_counter++, outer_ip_src);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TCP))
+		add_tcp(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_UDP))
+		add_udp(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN))
+		add_vxlan(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN_GPE))
+		add_vxlan_gpe(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GRE))
+		add_gre(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GENEVE))
+		add_geneve(items, items_counter++);
+	if (flow_items & FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GTP))
+		add_gtp(items, items_counter++);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
+}
+
+static void
+fill_actions(struct rte_flow_action *actions,
+	uint32_t flow_actions, uint32_t counter, uint16_t next_table,
+	uint16_t hairpinq)
+{
+	uint8_t actions_counter = 0;
+	uint16_t hairpin_queues[hairpinq];
+	uint16_t queues[RXQ_NUM];
+	uint16_t i;
+
+	/* None-fate actions */
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_MARK))
+		add_mark(actions, actions_counter++);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_COUNT))
+		add_count(actions, actions_counter++);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_SET_META))
+		add_set_meta(actions, actions_counter++);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_SET_TAG))
+		add_set_tag(actions, actions_counter++);
+
+	/* Fate actions */
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_QUEUE))
+		add_queue(actions, actions_counter++, counter % RXQ_NUM);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_RSS)) {
+		for (i = 0; i < RXQ_NUM; i++)
+			queues[i] = i;
+		add_rss(actions, actions_counter++, queues, RXQ_NUM);
+	}
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP))
+		add_jump(actions, actions_counter++, next_table);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_PORT_ID))
+		add_port_id(actions, actions_counter++);
+	if (flow_actions & FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_DROP))
+		add_drop(actions, actions_counter++);
+	if (flow_actions & HAIRPIN_QUEUE_ACTION)
+		add_queue(actions, actions_counter++,
+			(counter % hairpinq) + RXQ_NUM);
+	if (flow_actions & HAIRPIN_RSS_ACTION) {
+		for (i = 0; i < hairpinq; i++)
+			hairpin_queues[i] = i + RXQ_NUM;
+		add_rss(actions, actions_counter++, hairpin_queues, hairpinq);
+	}
+
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
+}
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint32_t flow_attrs,
+	uint32_t flow_items,
+	uint32_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	uint16_t hairpinq,
+	struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr;
+	struct rte_flow_item items[MAX_ITEMS_NUM];
+	struct rte_flow_action actions[MAX_ACTIONS_NUM];
+	struct rte_flow *flow = NULL;
+
+	memset(items, 0, sizeof(items));
+	memset(actions, 0, sizeof(actions));
+	memset(&attr, 0, sizeof(struct rte_flow_attr));
+
+	fill_attributes(&attr, flow_attrs, group);
+
+	fill_actions(actions, flow_actions,
+		outer_ip_src, next_table, hairpinq);
+
+	fill_items(items, flow_items, outer_ip_src);
+
+	flow = rte_flow_create(port_id, &attr, items, actions, error);
+	return flow;
+}
diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
new file mode 100644
index 000000000..6b30a4ae2
--- /dev/null
+++ b/app/test-flow-perf/flow_gen.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the items, actions and attributes
+ * definition. And the methods to prepare and fill items,
+ * actions and attributes to generate rte_flow rule.
+ */
+
+#ifndef FLOW_PERF_FLOW_GEN
+#define FLOW_PERF_FLOW_GEN
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+/* Actions */
+#define HAIRPIN_QUEUE_ACTION FLOW_ITEM_MASK(0)
+#define HAIRPIN_RSS_ACTION   FLOW_ITEM_MASK(1)
+
+/* Attributes */
+#define INGRESS              FLOW_ITEM_MASK(0)
+#define EGRESS               FLOW_ITEM_MASK(1)
+#define TRANSFER             FLOW_ITEM_MASK(2)
+
+struct rte_flow *
+generate_flow(uint16_t port_id,
+	uint16_t group,
+	uint32_t flow_attrs,
+	uint32_t flow_items,
+	uint32_t flow_actions,
+	uint16_t next_table,
+	uint32_t outer_ip_src,
+	uint16_t hairpinq,
+	struct rte_flow_error *error);
+
+#endif /* FLOW_PERF_FLOW_GEN */
diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
new file mode 100644
index 000000000..c84f45040
--- /dev/null
+++ b/app/test-flow-perf/items_gen.c
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contain the implementations of the items
+ * related methods. Each Item have a method to prepare
+ * the item and add it into items array in given index.
+ */
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "items_gen.h"
+#include "config.h"
+
+void
+add_ether(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_eth eth_spec;
+	static struct rte_flow_item_eth eth_mask;
+
+	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
+	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
+	items[items_counter].spec = &eth_spec;
+	items[items_counter].mask = &eth_mask;
+}
+
+void
+add_vlan(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_vlan vlan_spec;
+	static struct rte_flow_item_vlan vlan_mask;
+	uint16_t vlan_value = VLAN_VALUE;
+
+	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
+	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
+
+	vlan_spec.tci = RTE_BE16(vlan_value);
+	vlan_mask.tci = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
+	items[items_counter].spec = &vlan_spec;
+	items[items_counter].mask = &vlan_mask;
+}
+
+void
+add_ipv4(struct rte_flow_item *items,
+	uint8_t items_counter, rte_be32_t src_ipv4)
+{
+	static struct rte_flow_item_ipv4 ipv4_spec;
+	static struct rte_flow_item_ipv4 ipv4_mask;
+
+	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
+	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
+
+	ipv4_spec.hdr.src_addr = src_ipv4;
+	ipv4_mask.hdr.src_addr = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
+	items[items_counter].spec = &ipv4_spec;
+	items[items_counter].mask = &ipv4_mask;
+}
+
+
+void
+add_ipv6(struct rte_flow_item *items,
+	uint8_t items_counter, rte_be32_t src_ipv6)
+{
+	static struct rte_flow_item_ipv6 ipv6_spec;
+	static struct rte_flow_item_ipv6 ipv6_mask;
+
+	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
+	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
+
+	/** Set ipv6 src **/
+	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
+		sizeof(ipv6_spec.hdr.src_addr) / 2);
+
+	/** Full mask **/
+	memset(&ipv6_mask.hdr.src_addr, 0xff,
+		sizeof(ipv6_spec.hdr.src_addr));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
+	items[items_counter].spec = &ipv6_spec;
+	items[items_counter].mask = &ipv6_mask;
+}
+
+void
+add_tcp(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_tcp tcp_spec;
+	static struct rte_flow_item_tcp tcp_mask;
+
+	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
+	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
+	items[items_counter].spec = &tcp_spec;
+	items[items_counter].mask = &tcp_mask;
+}
+
+void
+add_udp(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_udp udp_spec;
+	static struct rte_flow_item_udp udp_mask;
+
+	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
+	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
+	items[items_counter].spec = &udp_spec;
+	items[items_counter].mask = &udp_mask;
+}
+
+void
+add_vxlan(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_vxlan vxlan_spec;
+	static struct rte_flow_item_vxlan vxlan_mask;
+	uint32_t vni_value;
+	uint8_t i;
+
+	vni_value = VNI_VALUE;
+
+	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
+	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
+
+	/* Set standard vxlan vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_mask.vni[2 - i] = 0xff;
+	}
+
+	/* Standard vxlan flags */
+	vxlan_spec.flags = 0x8;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+	items[items_counter].spec = &vxlan_spec;
+	items[items_counter].mask = &vxlan_mask;
+}
+
+void
+add_vxlan_gpe(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
+	static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
+	uint32_t vni_value;
+	uint8_t i;
+
+	vni_value = VNI_VALUE;
+
+	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
+
+	/* Set vxlan-gpe vni */
+	for (i = 0; i < 3; i++) {
+		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
+		vxlan_gpe_mask.vni[2 - i] = 0xff;
+	}
+
+	/* vxlan-gpe flags */
+	vxlan_gpe_spec.flags = 0x0c;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
+	items[items_counter].spec = &vxlan_gpe_spec;
+	items[items_counter].mask = &vxlan_gpe_mask;
+}
+
+void
+add_gre(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_gre gre_spec;
+	static struct rte_flow_item_gre gre_mask;
+	uint16_t proto;
+
+	proto = GRE_PROTO;
+
+	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
+	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
+
+	gre_spec.protocol = RTE_BE16(proto);
+	gre_mask.protocol = RTE_BE16(0xffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
+	items[items_counter].spec = &gre_spec;
+	items[items_counter].mask = &gre_mask;
+}
+
+void
+add_geneve(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_geneve geneve_spec;
+	static struct rte_flow_item_geneve geneve_mask;
+	uint32_t vni_value;
+	uint8_t i;
+
+	vni_value = VNI_VALUE;
+
+	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
+	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
+
+	for (i = 0; i < 3; i++) {
+		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
+		geneve_mask.vni[2 - i] = 0xff;
+	}
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
+	items[items_counter].spec = &geneve_spec;
+	items[items_counter].mask = &geneve_mask;
+}
+
+void
+add_gtp(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_gtp gtp_spec;
+	static struct rte_flow_item_gtp gtp_mask;
+	uint32_t teid_value;
+
+	teid_value = TEID_VALUE;
+
+	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
+	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
+
+	gtp_spec.teid = RTE_BE32(teid_value);
+	gtp_mask.teid = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
+	items[items_counter].spec = &gtp_spec;
+	items[items_counter].mask = &gtp_mask;
+}
+
+void
+add_meta_data(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_meta meta_spec;
+	static struct rte_flow_item_meta meta_mask;
+	uint32_t data;
+
+	data = META_DATA;
+
+	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
+	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
+
+	meta_spec.data = RTE_BE32(data);
+	meta_mask.data = RTE_BE32(0xffffffff);
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
+	items[items_counter].spec = &meta_spec;
+	items[items_counter].mask = &meta_mask;
+}
+
+
+void
+add_meta_tag(struct rte_flow_item *items, uint8_t items_counter)
+{
+	static struct rte_flow_item_tag tag_spec;
+	static struct rte_flow_item_tag tag_mask;
+	uint32_t data;
+	uint8_t index;
+
+	data = META_DATA;
+	index = TAG_INDEX;
+
+	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
+	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
+
+	tag_spec.data = RTE_BE32(data);
+	tag_mask.data = RTE_BE32(0xffffffff);
+	tag_spec.index = index;
+	tag_mask.index = 0xff;
+
+	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
+	items[items_counter].spec = &tag_spec;
+	items[items_counter].mask = &tag_mask;
+}
diff --git a/app/test-flow-perf/items_gen.h b/app/test-flow-perf/items_gen.h
new file mode 100644
index 000000000..0edbc0b37
--- /dev/null
+++ b/app/test-flow-perf/items_gen.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ *
+ * This file contains the items related methods
+ */
+
+#ifndef FLOW_PERF_ITEMS_GEN
+#define FLOW_PERF_ITEMS_GEN
+
+#include <stdint.h>
+#include <rte_flow.h>
+
+#include "config.h"
+
+void add_ether(struct rte_flow_item *items, uint8_t items_counter);
+void add_vlan(struct rte_flow_item *items, uint8_t items_counter);
+void add_ipv4(struct rte_flow_item *items,
+	uint8_t items_counter, rte_be32_t src_ipv4);
+void add_ipv6(struct rte_flow_item *items,
+	uint8_t items_counter, rte_be32_t src_ipv6);
+void add_udp(struct rte_flow_item *items, uint8_t items_counter);
+void add_tcp(struct rte_flow_item *items, uint8_t items_counter);
+void add_vxlan(struct rte_flow_item *items, uint8_t items_counter);
+void add_vxlan_gpe(struct rte_flow_item *items, uint8_t items_counter);
+void add_gre(struct rte_flow_item *items, uint8_t items_counter);
+void add_geneve(struct rte_flow_item *items, uint8_t items_counter);
+void add_gtp(struct rte_flow_item *items, uint8_t items_counter);
+void add_meta_data(struct rte_flow_item *items, uint8_t items_counter);
+void add_meta_tag(struct rte_flow_item *items, uint8_t items_counter);
+
+#endif /* FLOW_PERF_ITEMS_GEN */
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 8659870af..1feb73e6f 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -26,6 +26,7 @@
 #include <getopt.h>
 #include <stdbool.h>
 #include <sys/time.h>
+#include <signal.h>
 
 #include <rte_malloc.h>
 #include <rte_mempool.h>
@@ -34,29 +35,257 @@
 #include <rte_flow.h>
 
 #include "config.h"
+#include "flow_gen.h"
 
-static uint32_t nb_lcores;
+#define MAX_ITERATIONS             100
+#define DEFAULT_RULES_COUNT    4000000
+#define DEFAULT_ITERATION       100000
+
+struct rte_flow *flow;
+static uint8_t flow_group;
+
+static uint32_t flow_items;
+static uint32_t flow_actions;
+static uint32_t flow_attrs;
+static volatile bool force_quit;
+static bool dump_iterations;
 static struct rte_mempool *mbuf_mp;
+static uint32_t nb_lcores;
+static uint32_t flows_count;
+static uint32_t iterations_number;
+static uint32_t hairpinq;
 
 static void
 usage(char *progname)
 {
 	printf("\nusage: %s\n", progname);
+	printf("\nControl configurations:\n");
+	printf("  --flows-count=N: to set the number of needed"
+		" flows to insert, default is 4,000,000\n");
+	printf("  --dump-iterations: To print rates for each"
+		" iteration\n");
+
+	printf("To set flow attributes:\n");
+	printf("  --ingress: set ingress attribute in flows\n");
+	printf("  --egress: set egress attribute in flows\n");
+	printf("  --transfer: set transfer attribute in flows\n");
+	printf("  --group=N: set group for all flows,"
+		" default is 0\n");
+
+	printf("To set flow items:\n");
+	printf("  --ether: add ether layer in flow items\n");
+	printf("  --vlan: add vlan layer in flow items\n");
+	printf("  --ipv4: add ipv4 layer in flow items\n");
+	printf("  --ipv6: add ipv6 layer in flow items\n");
+	printf("  --tcp: add tcp layer in flow items\n");
+	printf("  --udp: add udp layer in flow items\n");
+	printf("  --vxlan: add vxlan layer in flow items\n");
+	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
+	printf("  --gre: add gre layer in flow items\n");
+	printf("  --geneve: add geneve layer in flow items\n");
+	printf("  --gtp: add gtp layer in flow items\n");
+	printf("  --meta: add meta layer in flow items\n");
+	printf("  --tag: add tag layer in flow items\n");
+
+	printf("To set flow actions:\n");
+	printf("  --port-id: add port-id action in flow actions\n");
+	printf("  --rss: add rss action in flow actions\n");
+	printf("  --queue: add queue action in flow actions\n");
+	printf("  --jump: add jump action in flow actions\n");
+	printf("  --mark: add mark action in flow actions\n");
+	printf("  --count: add count action in flow actions\n");
+	printf("  --set-meta: add set meta action in flow actions\n");
+	printf("  --set-tag: add set tag action in flow actions\n");
+	printf("  --drop: add drop action in flow actions\n");
+	printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
+	printf("  --hairpin-rss=N: add hairping-rss action in flow actions\n");
 }
 
 static void
 args_parse(int argc, char **argv)
 {
 	char **argvopt;
-	int opt;
+	int n, opt;
 	int opt_idx;
+	size_t i;
+
+	static const struct option_dict {
+		const char *str;
+		const uint64_t mask;
+		uint32_t *bitmap;
+	} flow_options[] = {
+		{
+			.str = "ether",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "ipv4",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV4),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "ipv6",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV6),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "vlan",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VLAN),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "tcp",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TCP),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "udp",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_UDP),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "vxlan",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "vxlan-gpe",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "gre",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GRE),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "geneve",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GENEVE),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "gtp",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GTP),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "meta",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_META),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "tag",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TAG),
+			.bitmap = &flow_items
+		},
+		{
+			.str = "ingress",
+			.mask = INGRESS,
+			.bitmap = &flow_attrs
+		},
+		{
+			.str = "egress",
+			.mask = EGRESS,
+			.bitmap = &flow_attrs
+		},
+		{
+			.str = "transfer",
+			.mask = TRANSFER,
+			.bitmap = &flow_attrs
+		},
+		{
+			.str = "port-id",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_PORT_ID),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "rss",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_RSS),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "queue",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_QUEUE),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "jump",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "mark",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_MARK),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "count",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_COUNT),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "set-meta",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_SET_META),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "set-tag",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_SET_TAG),
+			.bitmap = &flow_actions
+		},
+		{
+			.str = "drop",
+			.mask = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_DROP),
+			.bitmap = &flow_actions
+		}
+	};
+
 	static struct option lgopts[] = {
 		/* Control */
 		{ "help",                       0, 0, 0 },
+		{ "flows-count",                1, 0, 0 },
+		{ "dump-iterations",            0, 0, 0 },
+		/* Attributes */
+		{ "ingress",                    0, 0, 0 },
+		{ "egress",                     0, 0, 0 },
+		{ "transfer",                   0, 0, 0 },
+		{ "group",                      1, 0, 0 },
+		/* Items */
+		{ "ether",                      0, 0, 0 },
+		{ "vlan",                       0, 0, 0 },
+		{ "ipv4",                       0, 0, 0 },
+		{ "ipv6",                       0, 0, 0 },
+		{ "tcp",                        0, 0, 0 },
+		{ "udp",                        0, 0, 0 },
+		{ "vxlan",                      0, 0, 0 },
+		{ "vxlan-gpe",                  0, 0, 0 },
+		{ "gre",                        0, 0, 0 },
+		{ "geneve",                     0, 0, 0 },
+		{ "gtp",                        0, 0, 0 },
+		{ "meta",                       0, 0, 0 },
+		{ "tag",                        0, 0, 0 },
+		/* Actions */
+		{ "port-id",                    0, 0, 0 },
+		{ "rss",                        0, 0, 0 },
+		{ "queue",                      0, 0, 0 },
+		{ "jump",                       0, 0, 0 },
+		{ "mark",                       0, 0, 0 },
+		{ "count",                      0, 0, 0 },
+		{ "set-meta",                   0, 0, 0 },
+		{ "set-tag",                    0, 0, 0 },
+		{ "drop",                       0, 0, 0 },
+		{ "hairpin-queue",              1, 0, 0 },
+		{ "hairpin-rss",                1, 0, 0 },
 	};
 
+	flow_items = 0;
+	flow_actions = 0;
+	flow_attrs = 0;
+	hairpinq = 0;
 	argvopt = argv;
 
+	printf(":: Flow -> ");
 	while ((opt = getopt_long(argc, argvopt, "",
 				lgopts, &opt_idx)) != EOF) {
 		switch (opt) {
@@ -65,6 +294,65 @@ args_parse(int argc, char **argv)
 				usage(argv[0]);
 				rte_exit(EXIT_SUCCESS, "Displayed help\n");
 			}
+
+			if (strcmp(lgopts[opt_idx].name, "group") == 0) {
+				n = atoi(optarg);
+				if (n >= 0)
+					flow_group = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"flow group should be >= 0");
+				printf("group %d ", flow_group);
+			}
+
+			for (i = 0; i < RTE_DIM(flow_options); i++)
+				if (strcmp(lgopts[opt_idx].name,
+						flow_options[i].str) == 0) {
+					*flow_options[i].bitmap |=
+						flow_options[i].mask;
+					printf("%s / ", flow_options[i].str);
+				}
+
+			if (strcmp(lgopts[opt_idx].name,
+					"hairpin-rss") == 0) {
+				n = atoi(optarg);
+				if (n > 0)
+					hairpinq = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"Hairpin queues should be > 0 ");
+
+				flow_actions |= HAIRPIN_RSS_ACTION;
+				printf("hairpin-rss / ");
+			}
+			if (strcmp(lgopts[opt_idx].name,
+					"hairpin-queue") == 0) {
+				n = atoi(optarg);
+				if (n > 0)
+					hairpinq = n;
+				else
+					rte_exit(EXIT_SUCCESS,
+						"Hairpin queues should be > 0 ");
+
+				flow_actions |= HAIRPIN_QUEUE_ACTION;
+				printf("hairpin-queue / ");
+			}
+
+			/* Control */
+			if (strcmp(lgopts[opt_idx].name,
+					"flows-count") == 0) {
+				n = atoi(optarg);
+				if (n > (int) iterations_number)
+					flows_count = n;
+				else {
+					printf("\n\nflows_count should be > %d",
+						iterations_number);
+					rte_exit(EXIT_SUCCESS, " ");
+				}
+			}
+			if (strcmp(lgopts[opt_idx].name,
+					"dump-iterations") == 0)
+				dump_iterations = true;
 			break;
 		default:
 			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
@@ -73,6 +361,130 @@ args_parse(int argc, char **argv)
 			break;
 		}
 	}
+	printf("end_flow\n");
+}
+
+static void
+print_flow_error(struct rte_flow_error error)
+{
+	printf("Flow can't be created %d message: %s\n",
+		error.type,
+		error.message ? error.message : "(no stated reason)");
+}
+
+static inline void
+flows_handler(void)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint16_t nr_ports;
+	uint32_t i;
+	int port_id;
+	int iter_id;
+	uint32_t eagain_counter = 0;
+
+	nr_ports = rte_eth_dev_count_avail();
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	printf(":: Flows Count per port: %d\n", flows_count);
+
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		cpu_time_used = 0;
+		if (flow_group > 0) {
+			/*
+			 * Create global rule to jump into flow_group,
+			 * this way the app will avoid the default rules.
+			 *
+			 * Golbal rule:
+			 * group 0 eth / end actions jump group <flow_group>
+			 *
+			 */
+			flow = generate_flow(port_id, 0, flow_attrs,
+				FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
+				FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
+				flow_group, 0, 0, &error);
+
+			if (flow == NULL) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+		}
+
+		/* Insertion Rate */
+		printf("Flows insertion on port = %d\n", port_id);
+		start_iter = clock();
+		for (i = 0; i < flows_count; i++) {
+			do {
+				rte_errno = 0;
+				flow = generate_flow(port_id, flow_group,
+					flow_attrs, flow_items, flow_actions,
+					JUMP_ACTION_TABLE, i, hairpinq, &error);
+				if (flow == NULL)
+					eagain_counter++;
+			} while (rte_errno == EAGAIN);
+
+			if (force_quit)
+				i = flows_count;
+
+			if (!flow) {
+				print_flow_error(error);
+				rte_exit(EXIT_FAILURE, "error in creating flow");
+			}
+
+			if (i && !((i + 1) % iterations_number)) {
+				/* Save the insertion rate of each iter */
+				end_iter = clock();
+				delta = (double) (end_iter - start_iter);
+				iter_id = ((i + 1) / iterations_number) - 1;
+				cpu_time_per_iter[iter_id] =
+					delta / CLOCKS_PER_SEC;
+				cpu_time_used += cpu_time_per_iter[iter_id];
+				start_iter = clock();
+			}
+		}
+
+		/* Iteration rate per iteration */
+		if (dump_iterations)
+			for (i = 0; i < MAX_ITERATIONS; i++) {
+				if (cpu_time_per_iter[i] == -1)
+					continue;
+				delta = (double)(iterations_number /
+					cpu_time_per_iter[i]);
+				flows_rate = delta / 1000;
+				printf(":: Iteration #%d: %d flows "
+					"in %f sec[ Rate = %f K/Sec ]\n",
+					i, iterations_number,
+					cpu_time_per_iter[i], flows_rate);
+			}
+
+		/* Insertion rate for all flows */
+		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
+						flows_rate);
+		printf(":: The time for creating %d in flows %f seconds\n",
+						flows_count, cpu_time_used);
+		printf(":: EAGAIN counter = %d\n", eagain_counter);
+	}
+}
+
+static void
+signal_handler(int signum)
+{
+	if (signum == SIGINT || signum == SIGTERM) {
+		printf("\n\nSignal %d received, preparing to exit...\n",
+					signum);
+		printf("Error: Stats are wrong due to sudden signal!\n\n");
+		force_quit = true;
+	}
 }
 
 static void
@@ -80,8 +492,13 @@ init_port(void)
 {
 	int ret;
 	uint16_t std_queue;
+	uint16_t hairpin_q;
 	uint16_t port_id;
 	uint16_t nr_ports;
+	uint16_t nr_queues;
+	struct rte_eth_hairpin_conf hairpin_conf = {
+		.peer_count = 1,
+	};
 	struct rte_eth_conf port_conf = {
 		.rx_adv_conf = {
 			.rss_conf.rss_hf =
@@ -92,6 +509,10 @@ init_port(void)
 	struct rte_eth_rxconf rxq_conf;
 	struct rte_eth_dev_info dev_info;
 
+	nr_queues = RXQ_NUM;
+	if (hairpinq != 0)
+		nr_queues = RXQ_NUM + hairpinq;
+
 	nr_ports = rte_eth_dev_count_avail();
 	if (nr_ports == 0)
 		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
@@ -116,8 +537,8 @@ init_port(void)
 
 		printf(":: initializing port: %d\n", port_id);
 
-		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
-				TXQ_NUM, &port_conf);
+		ret = rte_eth_dev_configure(port_id, nr_queues,
+				nr_queues, &port_conf);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
 				":: cannot configure device: err=%d, port=%u\n",
@@ -153,6 +574,38 @@ init_port(void)
 				":: promiscuous mode enable failed: err=%s, port=%u\n",
 				rte_strerror(-ret), port_id);
 
+		if (hairpinq != 0) {
+			for (hairpin_q = RXQ_NUM, std_queue = 0;
+					std_queue < nr_queues;
+					hairpin_q++, std_queue++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue =
+					std_queue + TXQ_NUM;
+				ret = rte_eth_rx_hairpin_queue_setup(
+						port_id, hairpin_q,
+						NR_RXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+
+			for (hairpin_q = TXQ_NUM, std_queue = 0;
+					std_queue < nr_queues;
+					hairpin_q++, std_queue++) {
+				hairpin_conf.peers[0].port = port_id;
+				hairpin_conf.peers[0].queue =
+					std_queue + RXQ_NUM;
+				ret = rte_eth_tx_hairpin_queue_setup(
+						port_id, hairpin_q,
+						NR_TXD, &hairpin_conf);
+				if (ret != 0)
+					rte_exit(EXIT_FAILURE,
+						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
+						ret, port_id);
+			}
+		}
+
 		ret = rte_eth_dev_start(port_id);
 		if (ret < 0)
 			rte_exit(EXIT_FAILURE,
@@ -174,6 +627,15 @@ main(int argc, char **argv)
 	if (ret < 0)
 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
 
+	force_quit = false;
+	dump_iterations = false;
+	flows_count = DEFAULT_RULES_COUNT;
+	iterations_number = DEFAULT_ITERATION;
+	flow_group = 0;
+
+	signal(SIGINT, signal_handler);
+	signal(SIGTERM, signal_handler);
+
 	argc -= ret;
 	argv += ret;
 	if (argc > 1)
@@ -185,6 +647,8 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	flows_handler();
+
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
 		rte_eth_dev_stop(port);
diff --git a/app/test-flow-perf/meson.build b/app/test-flow-perf/meson.build
index 25711378f..6eaf83b41 100644
--- a/app/test-flow-perf/meson.build
+++ b/app/test-flow-perf/meson.build
@@ -2,6 +2,9 @@
 # Copyright(c) 2020 Mellanox Technologies, Ltd
 
 sources = files(
+	'actions_gen.c',
+	'flow_gen.c',
+	'items_gen.c',
 	'main.c',
 )
 
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index 7abcae3aa..0e4dcf1ad 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -216,6 +216,9 @@ New Features
 
   Add new application to test rte_flow performance.
 
+  Application features:
+  * Measure rte_flow insertion rate.
+
 
 Removed Items
 -------------
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 49eb450ae..6f3f7dafb 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -1,10 +1,29 @@
 ..	SPDX-License-Identifier: BSD-3-Clause
 	Copyright 2020 Mellanox Technologies, Ltd
 
-Flow performance tool
+Flow Performance Tool
 =====================
 
 Application for rte_flow performance testing.
+The application provide the ability to test insertion rate of specific
+rte_flow rule, by stressing it to the NIC, and calculate the insertion
+rate.
+
+The application offers some options in the command line, to configure
+which rule to apply.
+
+After that the application will start producing rules with same pattern
+but increasing the outer IP source address by 1 each time, thus it will
+give different flow each time, and all other items will have open masks.
+
+
+Known Limitations
+=================
+
+The current version has limitations which can be removed in future:
+
+* Support outer items up to tunnel layer only.
+* Single core insertion only.
 
 
 Compiling the Application
@@ -27,7 +46,7 @@ or :doc:`EAL parameters (FreeBSD) <../freebsd_gsg/freebsd_eal_parameters>` for
 a list of available EAL command-line options.
 
 
-Flow performance Options
+Flow Performance Options
 ------------------------
 
 The following are the command-line options for the flow performance application.
@@ -36,9 +55,179 @@ with a ``--`` separator:
 
 .. code-block:: console
 
-	sudo ./dpdk-test-flow-perf -n 4 -w 08:00.0 --
+	sudo ./dpdk-test-flow_perf -n 4 -w 08:00.0 -- --ingress --ether --ipv4 --queue --flows-count=1000000
 
 The command line options are:
 
 *	``--help``
 	Display a help message and quit.
+
+*	``--flows-count=N``
+	Set the number of needed flows to insert,
+	where 1 <= N <= "number of flows".
+	The default value is 4,000,000.
+
+*	``--dump-iterations``
+	Print rates for each iteration of flows.
+	Default iteration is 1,00,000.
+
+
+Attributes:
+
+*	``--ingress``
+	Set Ingress attribute to all flows attributes.
+
+*	``--egress``
+	Set Egress attribute to all flows attributes.
+
+*	``--transfer``
+	Set Transfer attribute to all flows attributes.
+
+*	``--group=N``
+	Set group for all flows, where N >= 0.
+	Default group is 0.
+
+Items:
+
+*	``--ether``
+	Add Ether item to all flows items, This item have open mask.
+
+*	``--vlan``
+	Add VLAN item to all flows items,
+	This item have VLAN value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--ipv4``
+	Add IPv4 item to all flows items,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--ipv6``
+	Add IPv6 item to all flows item,
+	This item have incremental source IP, with full mask.
+	Other fields are open mask.
+
+*	``--tcp``
+	Add TCP item to all flows items, This item have open mask.
+
+*	``--udp``
+	Add UDP item to all flows items, This item have open mask.
+
+*	``--vxlan``
+	Add VXLAN item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--vxlan-gpe``
+	Add VXLAN-GPE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gre``
+	Add GRE item to all flows items,
+	This item have protocol value defined in user_parameters.h
+	under ``GRE_PROTO`` with full mask, default protocol = 0x6558 "Ether"
+	Other fields are open mask.
+
+*	``--geneve``
+	Add GENEVE item to all flows items,
+	This item have VNI value defined in user_parameters.h
+	under ``VNI_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--gtp``
+	Add GTP item to all flows items,
+	This item have TEID value defined in user_parameters.h
+	under ``TEID_VALUE`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--meta``
+	Add Meta item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+	Other fields are open mask.
+
+*	``--tag``
+	Add Tag item to all flows items,
+	This item have data value defined in user_parameters.h
+	under ``META_DATA`` with full mask, default value = 1.
+
+	Also it have tag value defined in user_parameters.h
+	under ``TAG_INDEX`` with full mask, default value = 0.
+	Other fields are open mask.
+
+
+Actions:
+
+*	``--port-id``
+	Add port redirection action to all flows actions.
+	Port redirection destination is defined in user_parameters.h
+	under PORT_ID_DST, default value = 1.
+
+*	``--rss``
+	Add RSS action to all flows actions,
+	The queues in RSS action will be all queues configured
+	in the app.
+
+*	``--queue``
+	Add queue action to all flows items,
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX queues
+		Flow #0: queue index 0
+		Flow #1: queue index 1
+		Flow #2: queue index 2
+		Flow #3: queue index 3
+		Flow #4: queue index 0
+		...
+
+*	``--jump``
+	Add jump action to all flows actions.
+	Jump action destination is defined in user_parameters.h
+	under ``JUMP_ACTION_TABLE``, default value = 2.
+
+*	``--mark``
+	Add mark action to all flows actions.
+	Mark action id is defined in user_parameters.h
+	under ``MARK_ID``, default value = 1.
+
+*	``--count``
+	Add count action to all flows actions.
+
+*	``--set-meta``
+	Add set-meta action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+*	``--set-tag``
+	Add set-tag action to all flows actions.
+	Meta data is defined in user_parameters.h under ``META_DATA``
+	with full mask, default value = 1.
+
+	Tag index is defined in user_parameters.h under ``TAG_INDEX``
+	with full mask, default value = 0.
+
+*	``--drop``
+	Add drop action to all flows actions.
+
+*	``--hairpin-queue=N``
+	Add hairpin queue action to all flows actions.
+	The queue will change in round robin state for each flow.
+
+	For example:
+		The app running with 4 RX hairpin queues and 4 normal RX queues
+		Flow #0: queue index 4
+		Flow #1: queue index 5
+		Flow #2: queue index 6
+		Flow #3: queue index 7
+		Flow #4: queue index 4
+		...
+
+*	``--hairpin-rss=N``
+	Add hairpin RSS action to all flows actions.
+	The queues in RSS action will be all hairpin queues configured
+	in the app.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 3/5] app/flow-perf: add deletion rate calculation
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 1/5] app/flow-perf: add flow performance skeleton Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 2/5] app/flow-perf: add insertion rate calculation Wisam Jaddo
@ 2020-05-11 11:08                     ` " Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 4/5] app/flow-perf: add memory dump to app Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 5/5] app/flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko

Add the ability to test deletion rate for flow performance
application.

This feature is disabled by default, and can be enabled by
add "--deletion-rate" in the application command line options.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c              | 87 ++++++++++++++++++++++++++
 doc/guides/rel_notes/release_20_05.rst |  1 +
 doc/guides/tools/flow-perf.rst         |  4 ++
 3 files changed, 92 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 1feb73e6f..b177025fa 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,6 +49,7 @@ static uint32_t flow_actions;
 static uint32_t flow_attrs;
 static volatile bool force_quit;
 static bool dump_iterations;
+static  bool delete_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -64,6 +65,8 @@ usage(char *progname)
 		" flows to insert, default is 4,000,000\n");
 	printf("  --dump-iterations: To print rates for each"
 		" iteration\n");
+	printf("  --deletion-rate: Enable deletion rate"
+		" calculations\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -246,6 +249,7 @@ args_parse(int argc, char **argv)
 		{ "help",                       0, 0, 0 },
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
+		{ "deletion-rate",              0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -353,6 +357,9 @@ args_parse(int argc, char **argv)
 			if (strcmp(lgopts[opt_idx].name,
 					"dump-iterations") == 0)
 				dump_iterations = true;
+			if (strcmp(lgopts[opt_idx].name,
+					"deletion-rate") == 0)
+				delete_flag = true;
 			break;
 		default:
 			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
@@ -372,9 +379,75 @@ print_flow_error(struct rte_flow_error error)
 		error.message ? error.message : "(no stated reason)");
 }
 
+static inline void
+destroy_flows(int port_id, struct rte_flow **flow_list)
+{
+	struct rte_flow_error error;
+	clock_t start_iter, end_iter;
+	double cpu_time_used = 0;
+	double flows_rate;
+	double cpu_time_per_iter[MAX_ITERATIONS];
+	double delta;
+	uint32_t i;
+	int iter_id;
+
+	for (i = 0; i < MAX_ITERATIONS; i++)
+		cpu_time_per_iter[i] = -1;
+
+	if (iterations_number > flows_count)
+		iterations_number = flows_count;
+
+	/* Deletion Rate */
+	printf("Flows Deletion on port = %d\n", port_id);
+	start_iter = clock();
+	for (i = 0; i < flows_count; i++) {
+		if (flow_list[i] == 0)
+			break;
+
+		memset(&error, 0x33, sizeof(error));
+		if (rte_flow_destroy(port_id, flow_list[i], &error)) {
+			print_flow_error(error);
+			rte_exit(EXIT_FAILURE, "Error in deleting flow");
+		}
+
+		if (i && !((i + 1) % iterations_number)) {
+			/* Save the deletion rate of each iter */
+			end_iter = clock();
+			delta = (double) (end_iter - start_iter);
+			iter_id = ((i + 1) / iterations_number) - 1;
+			cpu_time_per_iter[iter_id] =
+				delta / CLOCKS_PER_SEC;
+			cpu_time_used += cpu_time_per_iter[iter_id];
+			start_iter = clock();
+		}
+	}
+
+	/* Deletion rate per iteration */
+	if (dump_iterations)
+		for (i = 0; i < MAX_ITERATIONS; i++) {
+			if (cpu_time_per_iter[i] == -1)
+				continue;
+			delta = (double)(iterations_number /
+				cpu_time_per_iter[i]);
+			flows_rate = delta / 1000;
+			printf(":: Iteration #%d: %d flows "
+				"in %f sec[ Rate = %f K/Sec ]\n",
+				i, iterations_number,
+				cpu_time_per_iter[i], flows_rate);
+		}
+
+	/* Deletion rate for all flows */
+	flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
+	printf("\n:: Total flow deletion rate -> %f K/Sec\n",
+		flows_rate);
+	printf(":: The time for deleting %d in flows %f seconds\n",
+		flows_count, cpu_time_used);
+}
+
 static inline void
 flows_handler(void)
 {
+	struct rte_flow **flow_list;
 	struct rte_flow_error error;
 	clock_t start_iter, end_iter;
 	double cpu_time_used;
@@ -386,6 +459,7 @@ flows_handler(void)
 	int port_id;
 	int iter_id;
 	uint32_t eagain_counter = 0;
+	uint32_t flow_index;
 
 	nr_ports = rte_eth_dev_count_avail();
 
@@ -397,8 +471,14 @@ flows_handler(void)
 
 	printf(":: Flows Count per port: %d\n", flows_count);
 
+	flow_list = rte_zmalloc("flow_list",
+		(sizeof(struct rte_flow *) * flows_count) + 1, 0);
+	if (flow_list == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
 	for (port_id = 0; port_id < nr_ports; port_id++) {
 		cpu_time_used = 0;
+		flow_index = 0;
 		if (flow_group > 0) {
 			/*
 			 * Create global rule to jump into flow_group,
@@ -417,6 +497,7 @@ flows_handler(void)
 				print_flow_error(error);
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
+			flow_list[flow_index++] = flow;
 		}
 
 		/* Insertion Rate */
@@ -440,6 +521,8 @@ flows_handler(void)
 				rte_exit(EXIT_FAILURE, "error in creating flow");
 			}
 
+			flow_list[flow_index++] = flow;
+
 			if (i && !((i + 1) % iterations_number)) {
 				/* Save the insertion rate of each iter */
 				end_iter = clock();
@@ -473,6 +556,9 @@ flows_handler(void)
 		printf(":: The time for creating %d in flows %f seconds\n",
 						flows_count, cpu_time_used);
 		printf(":: EAGAIN counter = %d\n", eagain_counter);
+
+		if (delete_flag)
+			destroy_flows(port_id, flow_list);
 	}
 }
 
@@ -631,6 +717,7 @@ main(int argc, char **argv)
 	dump_iterations = false;
 	flows_count = DEFAULT_RULES_COUNT;
 	iterations_number = DEFAULT_ITERATION;
+	delete_flag = false;
 	flow_group = 0;
 
 	signal(SIGINT, signal_handler);
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index 0e4dcf1ad..e511e9c1a 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -218,6 +218,7 @@ New Features
 
   Application features:
   * Measure rte_flow insertion rate.
+  * Measure rte_flow deletion rate.
 
 
 Removed Items
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 6f3f7dafb..1f8b1fa15 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -25,6 +25,8 @@ The current version has limitations which can be removed in future:
 * Support outer items up to tunnel layer only.
 * Single core insertion only.
 
+The application also provide the ability to measure rte flow deletion rate.
+
 
 Compiling the Application
 =========================
@@ -71,6 +73,8 @@ The command line options are:
 	Print rates for each iteration of flows.
 	Default iteration is 1,00,000.
 
+*	``--deletion-rate``
+	Enable deletion rate calculations.
 
 Attributes:
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 4/5] app/flow-perf: add memory dump to app
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
                                       ` (2 preceding siblings ...)
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 3/5] app/flow-perf: add deletion " Wisam Jaddo
@ 2020-05-11 11:08                     ` Wisam Jaddo
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 5/5] app/flow-perf: add packet forwarding support Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko; +Cc: Suanming Mou

Introduce new feature to dump memory statistics of each socket
and a total for all before and after the creation.

This will give two main advantage:
1- Check the memory consumption for large number of flows
"insertion rate scenario alone"

2- Check that no memory leackage after doing insertion then
deletion.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c              | 72 +++++++++++++++++++++++++-
 doc/guides/rel_notes/release_20_05.rst |  1 +
 doc/guides/tools/flow-perf.rst         |  6 ++-
 3 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index b177025fa..da54bbdef 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -49,7 +49,8 @@ static uint32_t flow_actions;
 static uint32_t flow_attrs;
 static volatile bool force_quit;
 static bool dump_iterations;
-static  bool delete_flag;
+static bool delete_flag;
+static bool dump_socket_mem_flag;
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
@@ -67,6 +68,7 @@ usage(char *progname)
 		" iteration\n");
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
+	printf("  --dump-socket-mem: To dump all socket memory\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -250,6 +252,7 @@ args_parse(int argc, char **argv)
 		{ "flows-count",                1, 0, 0 },
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
+		{ "dump-socket-mem",            0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -360,6 +363,9 @@ args_parse(int argc, char **argv)
 			if (strcmp(lgopts[opt_idx].name,
 					"deletion-rate") == 0)
 				delete_flag = true;
+			if (strcmp(lgopts[opt_idx].name,
+					"dump-socket-mem") == 0)
+				dump_socket_mem_flag = true;
 			break;
 		default:
 			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
@@ -371,6 +377,62 @@ args_parse(int argc, char **argv)
 	printf("end_flow\n");
 }
 
+/* Dump the socket memory statistics on console */
+static size_t
+dump_socket_mem(FILE *f)
+{
+	struct rte_malloc_socket_stats socket_stats;
+	unsigned int i = 0;
+	size_t total = 0;
+	size_t alloc = 0;
+	size_t free = 0;
+	unsigned int n_alloc = 0;
+	unsigned int n_free = 0;
+	bool active_nodes = false;
+
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if (rte_malloc_get_socket_stats(i, &socket_stats) ||
+		    !socket_stats.heap_totalsz_bytes)
+			continue;
+		active_nodes = true;
+		total += socket_stats.heap_totalsz_bytes;
+		alloc += socket_stats.heap_allocsz_bytes;
+		free += socket_stats.heap_freesz_bytes;
+		n_alloc += socket_stats.alloc_count;
+		n_free += socket_stats.free_count;
+		if (dump_socket_mem_flag) {
+			fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+			fprintf(f,
+				"\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
+				" %.6lf(%.3lf%%)\nfree: %.6lf"
+				"\nmax: %.6lf"
+				"\ncount alloc: %u\nfree: %u\n",
+				i,
+				socket_stats.heap_totalsz_bytes / 1.0e6,
+				socket_stats.heap_allocsz_bytes / 1.0e6,
+				(double)socket_stats.heap_allocsz_bytes * 100 /
+				(double)socket_stats.heap_totalsz_bytes,
+				socket_stats.heap_freesz_bytes / 1.0e6,
+				socket_stats.greatest_free_size / 1.0e6,
+				socket_stats.alloc_count,
+				socket_stats.free_count);
+				fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
+		}
+	}
+	if (dump_socket_mem_flag && active_nodes) {
+		fprintf(f,
+			"\nTotal: size(M)\ntotal: %.6lf"
+			"\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
+			"\ncount alloc: %u\nfree: %u\n",
+			total / 1.0e6, alloc / 1.0e6,
+			(double)alloc * 100 / (double)total, free / 1.0e6,
+			n_alloc, n_free);
+		fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
+	}
+	return alloc;
+}
+
 static void
 print_flow_error(struct rte_flow_error error)
 {
@@ -708,6 +770,7 @@ main(int argc, char **argv)
 	int ret;
 	uint16_t port;
 	struct rte_flow_error error;
+	int64_t alloc, last_alloc;
 
 	ret = rte_eal_init(argc, argv);
 	if (ret < 0)
@@ -718,6 +781,7 @@ main(int argc, char **argv)
 	flows_count = DEFAULT_RULES_COUNT;
 	iterations_number = DEFAULT_ITERATION;
 	delete_flag = false;
+	dump_socket_mem_flag = false;
 	flow_group = 0;
 
 	signal(SIGINT, signal_handler);
@@ -734,7 +798,13 @@ main(int argc, char **argv)
 	if (nb_lcores <= 1)
 		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
+	last_alloc = (int64_t)dump_socket_mem(stdout);
 	flows_handler();
+	alloc = (int64_t)dump_socket_mem(stdout);
+
+	if (last_alloc)
+		fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
+		(alloc - last_alloc) / 1.0e6);
 
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index e511e9c1a..aae1c5492 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -219,6 +219,7 @@ New Features
   Application features:
   * Measure rte_flow insertion rate.
   * Measure rte_flow deletion rate.
+  * Dump rte_flow memory consumption.
 
 
 Removed Items
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 1f8b1fa15..c8cce187e 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -25,7 +25,8 @@ The current version has limitations which can be removed in future:
 * Support outer items up to tunnel layer only.
 * Single core insertion only.
 
-The application also provide the ability to measure rte flow deletion rate.
+The application also provide the ability to measure rte flow deletion rate,
+in addition to memory consumption before and after the flows creation.
 
 
 Compiling the Application
@@ -76,6 +77,9 @@ The command line options are:
 *	``--deletion-rate``
 	Enable deletion rate calculations.
 
+*	``--dump-socket-mem``
+	Dump the memory stats for each socket before the insertion and after.
+
 Attributes:
 
 *	``--ingress``
-- 
2.17.1


^ permalink raw reply	[flat|nested] 102+ messages in thread

* [dpdk-dev] [PATCH v6 5/5] app/flow-perf: add packet forwarding support
  2020-05-11 11:08                   ` [dpdk-dev] [PATCH v6 0/5] Introduce flow perf application Wisam Jaddo
                                       ` (3 preceding siblings ...)
  2020-05-11 11:08                     ` [dpdk-dev] [PATCH v6 4/5] app/flow-perf: add memory dump to app Wisam Jaddo
@ 2020-05-11 11:08                     ` Wisam Jaddo
  4 siblings, 0 replies; 102+ messages in thread
From: Wisam Jaddo @ 2020-05-11 11:08 UTC (permalink / raw)
  To: dev, jackmin, thomas, jerinjacobk, ajit.khaparde, arybchenko

Introduce packet forwarding support to the app to do
some performance measurements.

The measurements are reported in term of packet per
second unit. The forwarding will start after the end
of insertion/deletion operations.

The support has single and multi performance measurements.

Signed-off-by: Wisam Jaddo <wisamm@mellanox.com>
---
 app/test-flow-perf/main.c              | 301 +++++++++++++++++++++++++
 doc/guides/rel_notes/release_20_05.rst |   1 +
 doc/guides/tools/flow-perf.rst         |   6 +
 3 files changed, 308 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index da54bbdef..3918cc132 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -27,6 +27,7 @@
 #include <stdbool.h>
 #include <sys/time.h>
 #include <signal.h>
+#include <unistd.h>
 
 #include <rte_malloc.h>
 #include <rte_mempool.h>
@@ -47,15 +48,45 @@ static uint8_t flow_group;
 static uint32_t flow_items;
 static uint32_t flow_actions;
 static uint32_t flow_attrs;
+
 static volatile bool force_quit;
 static bool dump_iterations;
 static bool delete_flag;
 static bool dump_socket_mem_flag;
+static bool enable_fwd;
+
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
 static uint32_t flows_count;
 static uint32_t iterations_number;
 static uint32_t hairpinq;
+static uint32_t nb_lcores;
+
+#define MAX_PKT_BURST    32
+#define LCORE_MODE_PKT    1
+#define LCORE_MODE_STATS  2
+#define MAX_STREAMS      64
+#define MAX_LCORES       64
+
+struct stream {
+	int tx_port;
+	int tx_queue;
+	int rx_port;
+	int rx_queue;
+};
+
+struct lcore_info {
+	int mode;
+	int streams_nb;
+	struct stream streams[MAX_STREAMS];
+	/* stats */
+	uint64_t tx_pkts;
+	uint64_t tx_drops;
+	uint64_t rx_pkts;
+	struct rte_mbuf *pkts[MAX_PKT_BURST];
+} __attribute__((__aligned__(64))); /* let it be cacheline aligned */
+
+static struct lcore_info lcore_infos[MAX_LCORES];
 
 static void
 usage(char *progname)
@@ -69,6 +100,8 @@ usage(char *progname)
 	printf("  --deletion-rate: Enable deletion rate"
 		" calculations\n");
 	printf("  --dump-socket-mem: To dump all socket memory\n");
+	printf("  --enable-fwd: To enable packets forwarding"
+		" after insertion\n");
 
 	printf("To set flow attributes:\n");
 	printf("  --ingress: set ingress attribute in flows\n");
@@ -253,6 +286,7 @@ args_parse(int argc, char **argv)
 		{ "dump-iterations",            0, 0, 0 },
 		{ "deletion-rate",              0, 0, 0 },
 		{ "dump-socket-mem",            0, 0, 0 },
+		{ "enable-fwd",                 0, 0, 0 },
 		/* Attributes */
 		{ "ingress",                    0, 0, 0 },
 		{ "egress",                     0, 0, 0 },
@@ -366,6 +400,9 @@ args_parse(int argc, char **argv)
 			if (strcmp(lgopts[opt_idx].name,
 					"dump-socket-mem") == 0)
 				dump_socket_mem_flag = true;
+			if (strcmp(lgopts[opt_idx].name,
+					"enable-fwd") == 0)
+				enable_fwd = true;
 			break;
 		default:
 			fprintf(stderr, "Invalid option: %s\n", argv[optind]);
@@ -635,6 +672,265 @@ signal_handler(int signum)
 	}
 }
 
+static inline uint16_t
+do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
+{
+	uint16_t cnt = 0;
+	cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
+	li->rx_pkts += cnt;
+	return cnt;
+}
+
+static inline void
+do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
+			uint16_t tx_queue)
+{
+	uint16_t nr_tx = 0;
+	uint16_t i;
+
+	nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
+	li->tx_pkts  += nr_tx;
+	li->tx_drops += cnt - nr_tx;
+
+	for (i = nr_tx; i < cnt; i++)
+		rte_pktmbuf_free(li->pkts[i]);
+}
+
+/*
+ * Method to convert numbers into pretty numbers that easy
+ * to read. The design here is to add comma after each three
+ * digits and set all of this inside buffer.
+ *
+ * For example if n = 1799321, the output will be
+ * 1,799,321 after this method which is easier to read.
+ */
+static char *
+pretty_number(uint64_t n, char *buf)
+{
+	char p[6][4];
+	int i = 0;
+	int off = 0;
+
+	while (n > 1000) {
+		sprintf(p[i], "%03d", (int)(n % 1000));
+		n /= 1000;
+		i += 1;
+	}
+
+	sprintf(p[i++], "%d", (int)n);
+
+	while (i--)
+		off += sprintf(buf + off, "%s,", p[i]);
+	buf[strlen(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static void
+packet_per_second_stats(void)
+{
+	struct lcore_info *old;
+	struct lcore_info *li, *oli;
+	int nr_lines = 0;
+	int i;
+
+	old = rte_zmalloc("old",
+		sizeof(struct lcore_info) * MAX_LCORES, 0);
+	if (old == NULL)
+		rte_exit(EXIT_FAILURE, "No Memory available!");
+
+	memcpy(old, lcore_infos,
+		sizeof(struct lcore_info) * MAX_LCORES);
+
+	while (!force_quit) {
+		uint64_t total_tx_pkts = 0;
+		uint64_t total_rx_pkts = 0;
+		uint64_t total_tx_drops = 0;
+		uint64_t tx_delta, rx_delta, drops_delta;
+		char buf[3][32];
+		int nr_valid_core = 0;