DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields
@ 2021-08-09  6:25 Zhihong Wang
  2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-09  6:25 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li; +Cc: Zhihong Wang

This patch aims to:
 1. Add flexibility by supporting IP & UDP src/dst fields
 2. Improve multi-core performance by using per-core vars

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 51 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 3bf6e1ce97..5b389165bc 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -40,41 +40,37 @@
 
 #include "testpmd.h"
 
-/* hardcoded configuration (for now) */
-static unsigned cfg_n_flows	= 1024;
-static uint32_t cfg_ip_src	= RTE_IPV4(10, 254, 0, 0);
-static uint32_t cfg_ip_dst	= RTE_IPV4(10, 253, 0, 0);
-static uint16_t cfg_udp_src	= 1000;
-static uint16_t cfg_udp_dst	= 1001;
+/*
+ * Hardcoded range for flow generation.
+ *
+ * Total number of flows =
+ *     cfg_n_ip_src * cfg_n_ip_dst * cfg_n_udp_src * cfg_n_udp_dst
+ */
+static uint32_t cfg_n_ip_src = 100;
+static uint32_t cfg_n_ip_dst = 100;
+static uint32_t cfg_n_udp_src = 10;
+static uint32_t cfg_n_udp_dst = 10;
+
+/* Base ip and port for flow generation. */
+static uint32_t cfg_ip_src_base = RTE_IPV4(10, 254, 0, 0);
+static uint32_t cfg_ip_dst_base = RTE_IPV4(10, 253, 0, 0);
+static uint16_t cfg_udp_src_base = 1000;
+static uint16_t cfg_udp_dst_base = 1001;
 static struct rte_ether_addr cfg_ether_src =
 	{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x00 }};
 static struct rte_ether_addr cfg_ether_dst =
 	{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x01 }};
 
+RTE_DEFINE_PER_LCORE(uint32_t, _next_ip_src);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_ip_dst);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_udp_src);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_udp_dst);
+
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
 /* Use this type to inform GCC that ip_sum violates aliasing rules. */
 typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__));
 
-static inline uint16_t
-ip_sum(const alias_int16_t *hdr, int hdr_len)
-{
-	uint32_t sum = 0;
-
-	while (hdr_len > 1)
-	{
-		sum += *hdr++;
-		if (sum & 0x80000000)
-			sum = (sum & 0xFFFF) + (sum >> 16);
-		hdr_len -= 2;
-	}
-
-	while (sum >> 16)
-		sum = (sum & 0xFFFF) + (sum >> 16);
-
-	return ~sum;
-}
-
 /*
  * Multi-flow generation mode.
  *
@@ -85,7 +81,7 @@ ip_sum(const alias_int16_t *hdr, int hdr_len)
 static void
 pkt_burst_flow_gen(struct fwd_stream *fs)
 {
-	unsigned pkt_size = tx_pkt_length - 4;	/* Adjust FCS */
+	uint32_t pkt_size = tx_pkt_length - 4; /* Adjust FCS */
 	struct rte_mbuf  *pkts_burst[MAX_PKT_BURST];
 	struct rte_mempool *mbp;
 	struct rte_mbuf  *pkt = NULL;
@@ -102,15 +98,18 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint32_t retry;
 	uint64_t tx_offloads;
 	uint64_t start_tsc = 0;
-	static int next_flow = 0;
+	uint32_t next_ip_src = RTE_PER_LCORE(_next_ip_src);
+	uint32_t next_ip_dst = RTE_PER_LCORE(_next_ip_dst);
+	uint32_t next_udp_src = RTE_PER_LCORE(_next_udp_src);
+	uint32_t next_udp_dst = RTE_PER_LCORE(_next_udp_dst);
 
 	get_start_cycles(&start_tsc);
 
 	/* Receive a burst of packets and discard them. */
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
 				 nb_pkt_per_burst);
+	inc_rx_burst_stats(fs, nb_rx);
 	fs->rx_packets += nb_rx;
-
 	for (i = 0; i < nb_rx; i++)
 		rte_pktmbuf_free(pkts_burst[i]);
 
@@ -144,7 +143,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 			eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
 			rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
 			rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
-			eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+			eth_hdr->ether_type =
+				rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 
 			/* Initialize IP header. */
 			ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
@@ -155,22 +155,30 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 			ip_hdr->time_to_live	= IP_DEFTTL;
 			ip_hdr->next_proto_id	= IPPROTO_UDP;
 			ip_hdr->packet_id	= 0;
-			ip_hdr->src_addr	= rte_cpu_to_be_32(cfg_ip_src);
-			ip_hdr->dst_addr	= rte_cpu_to_be_32(cfg_ip_dst +
-								   next_flow);
-			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
-								   sizeof(*eth_hdr));
-			ip_hdr->hdr_checksum	= ip_sum((const alias_int16_t *)ip_hdr,
-							 sizeof(*ip_hdr));
+			ip_hdr->src_addr	=
+				rte_cpu_to_be_32(cfg_ip_src_base
+						+ next_ip_src);
+			ip_hdr->dst_addr	=
+				rte_cpu_to_be_32(cfg_ip_dst_base
+						+ next_ip_dst);
+			ip_hdr->total_length	=
+				RTE_CPU_TO_BE_16(pkt_size
+						- sizeof(*eth_hdr));
+			rte_ipv4_cksum(ip_hdr);
 
 			/* Initialize UDP header. */
 			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-			udp_hdr->src_port	= rte_cpu_to_be_16(cfg_udp_src);
-			udp_hdr->dst_port	= rte_cpu_to_be_16(cfg_udp_dst);
+			udp_hdr->src_port	=
+				rte_cpu_to_be_16(cfg_udp_src_base
+						+ next_udp_src);
+			udp_hdr->dst_port	=
+				rte_cpu_to_be_16(cfg_udp_dst_base
+						+ next_udp_dst);
+			udp_hdr->dgram_len	=
+				RTE_CPU_TO_BE_16(pkt_size
+						- sizeof(*eth_hdr)
+						- sizeof(*ip_hdr));
 			udp_hdr->dgram_cksum	= 0; /* No UDP checksum. */
-			udp_hdr->dgram_len	= RTE_CPU_TO_BE_16(pkt_size -
-								   sizeof(*eth_hdr) -
-								   sizeof(*ip_hdr));
 			pkt->nb_segs		= 1;
 			pkt->pkt_len		= pkt_size;
 			pkt->ol_flags		&= EXT_ATTACHED_MBUF;
@@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		}
 		pkts_burst[nb_pkt] = pkt;
 
-		next_flow = (next_flow + 1) % cfg_n_flows;
+		if (++next_udp_dst < cfg_n_udp_dst)
+			continue;
+		next_udp_dst = 0;
+		if (++next_udp_src < cfg_n_udp_src)
+			continue;
+		next_udp_src = 0;
+		if (++next_ip_dst < cfg_n_ip_dst)
+			continue;
+		next_ip_dst = 0;
+		if (++next_ip_src < cfg_n_ip_src)
+			continue;
+		next_ip_src = 0;
 	}
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
 	 */
-	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
+	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
 		retry = 0;
-		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
 			rte_delay_us(burst_tx_delay_time);
 			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
-					&pkts_burst[nb_tx], nb_rx - nb_tx);
+					&pkts_burst[nb_tx], nb_pkt - nb_tx);
 		}
 	}
-	fs->tx_packets += nb_tx;
 
 	inc_tx_burst_stats(fs, nb_tx);
-	if (unlikely(nb_tx < nb_pkt)) {
-		/* Back out the flow counter. */
-		next_flow -= (nb_pkt - nb_tx);
-		while (next_flow < 0)
-			next_flow += cfg_n_flows;
+	fs->tx_packets += nb_tx;
+	/* Catch up flow idx by actual sent. */
+	for (i = 0; i < nb_tx; ++i) {
+		RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
+		if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
+			continue;
+		RTE_PER_LCORE(_next_udp_dst) = 0;
+		RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
+		if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
+			continue;
+		RTE_PER_LCORE(_next_udp_src) = 0;
+		RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
+		if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
+			continue;
+		RTE_PER_LCORE(_next_ip_dst) = 0;
+		RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
+		if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
+			continue;
+		RTE_PER_LCORE(_next_ip_src) = 0;
+	}
 
+	if (unlikely(nb_tx < nb_pkt)) {
+		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_pkt);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-09  6:25 [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields Zhihong Wang
@ 2021-08-09  6:52 ` Zhihong Wang
  2021-08-09 12:21   ` Singh, Aman Deep
  2021-08-09 15:18   ` [dpdk-dev] " Ferruh Yigit
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-09  6:52 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li; +Cc: Zhihong Wang

This patch aims to:
 1. Add flexibility by supporting IP & UDP src/dst fields
 2. Improve multi-core performance by using per-core vars

v2: fix assigning ip header cksum

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 51 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 3bf6e1ce97..333c3b2cd2 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -40,41 +40,37 @@
 
 #include "testpmd.h"
 
-/* hardcoded configuration (for now) */
-static unsigned cfg_n_flows	= 1024;
-static uint32_t cfg_ip_src	= RTE_IPV4(10, 254, 0, 0);
-static uint32_t cfg_ip_dst	= RTE_IPV4(10, 253, 0, 0);
-static uint16_t cfg_udp_src	= 1000;
-static uint16_t cfg_udp_dst	= 1001;
+/*
+ * Hardcoded range for flow generation.
+ *
+ * Total number of flows =
+ *     cfg_n_ip_src * cfg_n_ip_dst * cfg_n_udp_src * cfg_n_udp_dst
+ */
+static uint32_t cfg_n_ip_src = 100;
+static uint32_t cfg_n_ip_dst = 100;
+static uint32_t cfg_n_udp_src = 10;
+static uint32_t cfg_n_udp_dst = 10;
+
+/* Base ip and port for flow generation. */
+static uint32_t cfg_ip_src_base = RTE_IPV4(10, 254, 0, 0);
+static uint32_t cfg_ip_dst_base = RTE_IPV4(10, 253, 0, 0);
+static uint16_t cfg_udp_src_base = 1000;
+static uint16_t cfg_udp_dst_base = 1001;
 static struct rte_ether_addr cfg_ether_src =
 	{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x00 }};
 static struct rte_ether_addr cfg_ether_dst =
 	{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x01 }};
 
+RTE_DEFINE_PER_LCORE(uint32_t, _next_ip_src);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_ip_dst);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_udp_src);
+RTE_DEFINE_PER_LCORE(uint32_t, _next_udp_dst);
+
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
 /* Use this type to inform GCC that ip_sum violates aliasing rules. */
 typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__));
 
-static inline uint16_t
-ip_sum(const alias_int16_t *hdr, int hdr_len)
-{
-	uint32_t sum = 0;
-
-	while (hdr_len > 1)
-	{
-		sum += *hdr++;
-		if (sum & 0x80000000)
-			sum = (sum & 0xFFFF) + (sum >> 16);
-		hdr_len -= 2;
-	}
-
-	while (sum >> 16)
-		sum = (sum & 0xFFFF) + (sum >> 16);
-
-	return ~sum;
-}
-
 /*
  * Multi-flow generation mode.
  *
@@ -85,7 +81,7 @@ ip_sum(const alias_int16_t *hdr, int hdr_len)
 static void
 pkt_burst_flow_gen(struct fwd_stream *fs)
 {
-	unsigned pkt_size = tx_pkt_length - 4;	/* Adjust FCS */
+	uint32_t pkt_size = tx_pkt_length - 4; /* Adjust FCS */
 	struct rte_mbuf  *pkts_burst[MAX_PKT_BURST];
 	struct rte_mempool *mbp;
 	struct rte_mbuf  *pkt = NULL;
@@ -102,15 +98,18 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint32_t retry;
 	uint64_t tx_offloads;
 	uint64_t start_tsc = 0;
-	static int next_flow = 0;
+	uint32_t next_ip_src = RTE_PER_LCORE(_next_ip_src);
+	uint32_t next_ip_dst = RTE_PER_LCORE(_next_ip_dst);
+	uint32_t next_udp_src = RTE_PER_LCORE(_next_udp_src);
+	uint32_t next_udp_dst = RTE_PER_LCORE(_next_udp_dst);
 
 	get_start_cycles(&start_tsc);
 
 	/* Receive a burst of packets and discard them. */
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
 				 nb_pkt_per_burst);
+	inc_rx_burst_stats(fs, nb_rx);
 	fs->rx_packets += nb_rx;
-
 	for (i = 0; i < nb_rx; i++)
 		rte_pktmbuf_free(pkts_burst[i]);
 
@@ -144,7 +143,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 			eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
 			rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
 			rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
-			eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+			eth_hdr->ether_type =
+				rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 
 			/* Initialize IP header. */
 			ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
@@ -155,22 +155,30 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 			ip_hdr->time_to_live	= IP_DEFTTL;
 			ip_hdr->next_proto_id	= IPPROTO_UDP;
 			ip_hdr->packet_id	= 0;
-			ip_hdr->src_addr	= rte_cpu_to_be_32(cfg_ip_src);
-			ip_hdr->dst_addr	= rte_cpu_to_be_32(cfg_ip_dst +
-								   next_flow);
-			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
-								   sizeof(*eth_hdr));
-			ip_hdr->hdr_checksum	= ip_sum((const alias_int16_t *)ip_hdr,
-							 sizeof(*ip_hdr));
+			ip_hdr->src_addr	=
+				rte_cpu_to_be_32(cfg_ip_src_base
+						+ next_ip_src);
+			ip_hdr->dst_addr	=
+				rte_cpu_to_be_32(cfg_ip_dst_base
+						+ next_ip_dst);
+			ip_hdr->total_length	=
+				RTE_CPU_TO_BE_16(pkt_size
+						- sizeof(*eth_hdr));
+			ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
 
 			/* Initialize UDP header. */
 			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-			udp_hdr->src_port	= rte_cpu_to_be_16(cfg_udp_src);
-			udp_hdr->dst_port	= rte_cpu_to_be_16(cfg_udp_dst);
+			udp_hdr->src_port	=
+				rte_cpu_to_be_16(cfg_udp_src_base
+						+ next_udp_src);
+			udp_hdr->dst_port	=
+				rte_cpu_to_be_16(cfg_udp_dst_base
+						+ next_udp_dst);
+			udp_hdr->dgram_len	=
+				RTE_CPU_TO_BE_16(pkt_size
+						- sizeof(*eth_hdr)
+						- sizeof(*ip_hdr));
 			udp_hdr->dgram_cksum	= 0; /* No UDP checksum. */
-			udp_hdr->dgram_len	= RTE_CPU_TO_BE_16(pkt_size -
-								   sizeof(*eth_hdr) -
-								   sizeof(*ip_hdr));
 			pkt->nb_segs		= 1;
 			pkt->pkt_len		= pkt_size;
 			pkt->ol_flags		&= EXT_ATTACHED_MBUF;
@@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		}
 		pkts_burst[nb_pkt] = pkt;
 
-		next_flow = (next_flow + 1) % cfg_n_flows;
+		if (++next_udp_dst < cfg_n_udp_dst)
+			continue;
+		next_udp_dst = 0;
+		if (++next_udp_src < cfg_n_udp_src)
+			continue;
+		next_udp_src = 0;
+		if (++next_ip_dst < cfg_n_ip_dst)
+			continue;
+		next_ip_dst = 0;
+		if (++next_ip_src < cfg_n_ip_src)
+			continue;
+		next_ip_src = 0;
 	}
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 	/*
 	 * Retry if necessary
 	 */
-	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
+	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
 		retry = 0;
-		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
 			rte_delay_us(burst_tx_delay_time);
 			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
-					&pkts_burst[nb_tx], nb_rx - nb_tx);
+					&pkts_burst[nb_tx], nb_pkt - nb_tx);
 		}
 	}
-	fs->tx_packets += nb_tx;
 
 	inc_tx_burst_stats(fs, nb_tx);
-	if (unlikely(nb_tx < nb_pkt)) {
-		/* Back out the flow counter. */
-		next_flow -= (nb_pkt - nb_tx);
-		while (next_flow < 0)
-			next_flow += cfg_n_flows;
+	fs->tx_packets += nb_tx;
+	/* Catch up flow idx by actual sent. */
+	for (i = 0; i < nb_tx; ++i) {
+		RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
+		if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
+			continue;
+		RTE_PER_LCORE(_next_udp_dst) = 0;
+		RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
+		if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
+			continue;
+		RTE_PER_LCORE(_next_udp_src) = 0;
+		RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
+		if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
+			continue;
+		RTE_PER_LCORE(_next_ip_dst) = 0;
+		RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
+		if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
+			continue;
+		RTE_PER_LCORE(_next_ip_src) = 0;
+	}
 
+	if (unlikely(nb_tx < nb_pkt)) {
+		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_pkt);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
@ 2021-08-09 12:21   ` Singh, Aman Deep
  2021-08-10  7:30     ` [dpdk-dev] [External] " 王志宏
  2021-08-09 15:18   ` [dpdk-dev] " Ferruh Yigit
  1 sibling, 1 reply; 32+ messages in thread
From: Singh, Aman Deep @ 2021-08-09 12:21 UTC (permalink / raw)
  To: Zhihong Wang, dev, ferruh.yigit, xiaoyun.li

Hi Wang,

On 8/9/2021 12:22 PM, Zhihong Wang wrote:
> This patch aims to:
>   1. Add flexibility by supporting IP & UDP src/dst fields
>   2. Improve multi-core performance by using per-core vars
>
> v2: fix assigning ip header cksum
>
> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>   <snip>

 From defination of flowgen as per the documentation-

  *

    |flowgen|: Multi-flow generation mode. Originates a number of flows
    (with varying destination IP addresses), and terminate receive traffic.

So changing the src IP address may not fit the defination of a source 
generating real traffic.

I would like to check this part further.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
  2021-08-09 12:21   ` Singh, Aman Deep
@ 2021-08-09 15:18   ` Ferruh Yigit
  2021-08-10  7:57     ` [dpdk-dev] [External] " 王志宏
  1 sibling, 1 reply; 32+ messages in thread
From: Ferruh Yigit @ 2021-08-09 15:18 UTC (permalink / raw)
  To: Zhihong Wang, dev, xiaoyun.li
  Cc: Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

On 8/9/2021 7:52 AM, Zhihong Wang wrote:
> This patch aims to:
>  1. Add flexibility by supporting IP & UDP src/dst fields

What is the reason/"use case" of this flexibility?

>  2. Improve multi-core performance by using per-core vars>

On multi core this also has syncronization problem, OK to make it per-core. Do
you have any observed performance difference, if so how much is it?

And can you please separate this to its own patch? This can be before ip/udp update.

> v2: fix assigning ip header cksum
> 

+1 to update, can you please make it as seperate patch?

So overall this can be a patchset with 4 patches:
1- Fix retry logic (nb_rx -> nb_pkt)
2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
3- User per-core varible (for 'next_flow')
4- Support ip/udp src/dst variaty of packets

> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
>  1 file changed, 86 insertions(+), 51 deletions(-)
> 

<...>

> @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
>  		}
>  		pkts_burst[nb_pkt] = pkt;
>  
> -		next_flow = (next_flow + 1) % cfg_n_flows;
> +		if (++next_udp_dst < cfg_n_udp_dst)
> +			continue;
> +		next_udp_dst = 0;
> +		if (++next_udp_src < cfg_n_udp_src)
> +			continue;
> +		next_udp_src = 0;
> +		if (++next_ip_dst < cfg_n_ip_dst)
> +			continue;
> +		next_ip_dst = 0;
> +		if (++next_ip_src < cfg_n_ip_src)
> +			continue;
> +		next_ip_src = 0;

What is the logic here, can you please clarifiy the packet generation logic both
in a comment here and in the commit log?

>  	}
>  
>  	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
>  	/*
>  	 * Retry if necessary
>  	 */
> -	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
> +	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
>  		retry = 0;
> -		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
> +		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
>  			rte_delay_us(burst_tx_delay_time);
>  			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
> -					&pkts_burst[nb_tx], nb_rx - nb_tx);
> +					&pkts_burst[nb_tx], nb_pkt - nb_tx);
>  		}

+1 to this fix, thanks for it. But can you please make a seperate patch for
this, with proper 'Fixes:' tag etc..

>  	}
> -	fs->tx_packets += nb_tx;
>  
>  	inc_tx_burst_stats(fs, nb_tx);
> -	if (unlikely(nb_tx < nb_pkt)) {
> -		/* Back out the flow counter. */
> -		next_flow -= (nb_pkt - nb_tx);
> -		while (next_flow < 0)
> -			next_flow += cfg_n_flows;
> +	fs->tx_packets += nb_tx;
> +	/* Catch up flow idx by actual sent. */
> +	for (i = 0; i < nb_tx; ++i) {
> +		RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
> +		if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
> +			continue;
> +		RTE_PER_LCORE(_next_udp_dst) = 0;
> +		RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
> +		if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
> +			continue;
> +		RTE_PER_LCORE(_next_udp_src) = 0;
> +		RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
> +		if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
> +			continue;
> +		RTE_PER_LCORE(_next_ip_dst) = 0;
> +		RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
> +		if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
> +			continue;
> +		RTE_PER_LCORE(_next_ip_src) = 0;
> +	}

Why per-core variables are not used in forward function, but local variables
(like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
impact?

And why not directly assign from local variables to per-core variables, but have
above catch up loop?



^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-09 12:21   ` Singh, Aman Deep
@ 2021-08-10  7:30     ` 王志宏
  0 siblings, 0 replies; 32+ messages in thread
From: 王志宏 @ 2021-08-10  7:30 UTC (permalink / raw)
  To: Singh, Aman Deep; +Cc: dev, ferruh.yigit, xiaoyun.li

Hi Aman,

On Mon, Aug 9, 2021 at 8:21 PM Singh, Aman Deep
<aman.deep.singh@intel.com> wrote:
>
> Hi Wang,
>
> On 8/9/2021 12:22 PM, Zhihong Wang wrote:
>
> This patch aims to:
>  1. Add flexibility by supporting IP & UDP src/dst fields
>  2. Improve multi-core performance by using per-core vars
>
> v2: fix assigning ip header cksum
>
> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>  <snip>
>
> From defination of flowgen as per the documentation-
>
> flowgen: Multi-flow generation mode. Originates a number of flows (with varying destination IP addresses), and terminate receive traffic.
>
> So changing the src IP address may not fit the defination of a source generating real traffic.
>
> I would like to check this part further.

Thanks for the review. The purpose of introducing them is to emulate
pkt generators behaviors.

Do you think keeping cfg_n_ip_src & cfg_n_udp_src while setting them =
1 by default makes sense? Or maybe update the documentation?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-09 15:18   ` [dpdk-dev] " Ferruh Yigit
@ 2021-08-10  7:57     ` 王志宏
  2021-08-10  9:12       ` Ferruh Yigit
  0 siblings, 1 reply; 32+ messages in thread
From: 王志宏 @ 2021-08-10  7:57 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, xiaoyun.li, Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

Thanks for the review Ferruh :)

On Mon, Aug 9, 2021 at 11:18 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 8/9/2021 7:52 AM, Zhihong Wang wrote:
> > This patch aims to:
> >  1. Add flexibility by supporting IP & UDP src/dst fields
>
> What is the reason/"use case" of this flexibility?

The purpose is to emulate pkt generator behaviors.

>
> >  2. Improve multi-core performance by using per-core vars>
>
> On multi core this also has syncronization problem, OK to make it per-core. Do
> you have any observed performance difference, if so how much is it?

Huge difference, one example: 8 core flowgen -> rxonly results: 43
Mpps (per-core) vs. 9.3 Mpps (shared), of course the numbers "varies
depending on system configuration".

>
> And can you please separate this to its own patch? This can be before ip/udp update.

Will do.

>
> > v2: fix assigning ip header cksum
> >
>
> +1 to update, can you please make it as seperate patch?

Sure.

>
> So overall this can be a patchset with 4 patches:
> 1- Fix retry logic (nb_rx -> nb_pkt)
> 2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
> 3- User per-core varible (for 'next_flow')
> 4- Support ip/udp src/dst variaty of packets
>

Great summary. Thanks a lot.

> > Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> > ---
> >  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
> >  1 file changed, 86 insertions(+), 51 deletions(-)
> >
>
> <...>
>
> > @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> >               }
> >               pkts_burst[nb_pkt] = pkt;
> >
> > -             next_flow = (next_flow + 1) % cfg_n_flows;
> > +             if (++next_udp_dst < cfg_n_udp_dst)
> > +                     continue;
> > +             next_udp_dst = 0;
> > +             if (++next_udp_src < cfg_n_udp_src)
> > +                     continue;
> > +             next_udp_src = 0;
> > +             if (++next_ip_dst < cfg_n_ip_dst)
> > +                     continue;
> > +             next_ip_dst = 0;
> > +             if (++next_ip_src < cfg_n_ip_src)
> > +                     continue;
> > +             next_ip_src = 0;
>
> What is the logic here, can you please clarifiy the packet generation logic both
> in a comment here and in the commit log?

It's round-robin field by field. Will add the comments.

>
> >       }
> >
> >       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
> >       /*
> >        * Retry if necessary
> >        */
> > -     if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
> > +     if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
> >               retry = 0;
> > -             while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
> > +             while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
> >                       rte_delay_us(burst_tx_delay_time);
> >                       nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
> > -                                     &pkts_burst[nb_tx], nb_rx - nb_tx);
> > +                                     &pkts_burst[nb_tx], nb_pkt - nb_tx);
> >               }
>
> +1 to this fix, thanks for it. But can you please make a seperate patch for
> this, with proper 'Fixes:' tag etc..

Ok.

>
> >       }
> > -     fs->tx_packets += nb_tx;
> >
> >       inc_tx_burst_stats(fs, nb_tx);
> > -     if (unlikely(nb_tx < nb_pkt)) {
> > -             /* Back out the flow counter. */
> > -             next_flow -= (nb_pkt - nb_tx);
> > -             while (next_flow < 0)
> > -                     next_flow += cfg_n_flows;
> > +     fs->tx_packets += nb_tx;
> > +     /* Catch up flow idx by actual sent. */
> > +     for (i = 0; i < nb_tx; ++i) {
> > +             RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
> > +             if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
> > +                     continue;
> > +             RTE_PER_LCORE(_next_udp_dst) = 0;
> > +             RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
> > +             if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
> > +                     continue;
> > +             RTE_PER_LCORE(_next_udp_src) = 0;
> > +             RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
> > +             if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
> > +                     continue;
> > +             RTE_PER_LCORE(_next_ip_dst) = 0;
> > +             RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
> > +             if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
> > +                     continue;
> > +             RTE_PER_LCORE(_next_ip_src) = 0;
> > +     }
>
> Why per-core variables are not used in forward function, but local variables
> (like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
> impact?
>
> And why not directly assign from local variables to per-core variables, but have
> above catch up loop?
>
>

Local vars are for generating pkts, global ones catch up finally when
nb_tx is clear.
So flow indexes only increase by actual sent pkt number.
It serves the same purpose of the original "/* backout the flow counter */".
My math isn't good enough to make it look more intelligent though.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-10  7:57     ` [dpdk-dev] [External] " 王志宏
@ 2021-08-10  9:12       ` Ferruh Yigit
  2021-08-11  2:48         ` 王志宏
  0 siblings, 1 reply; 32+ messages in thread
From: Ferruh Yigit @ 2021-08-10  9:12 UTC (permalink / raw)
  To: 王志宏
  Cc: dev, xiaoyun.li, Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

On 8/10/2021 8:57 AM, 王志宏 wrote:
> Thanks for the review Ferruh :)
> 
> On Mon, Aug 9, 2021 at 11:18 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 8/9/2021 7:52 AM, Zhihong Wang wrote:
>>> This patch aims to:
>>>  1. Add flexibility by supporting IP & UDP src/dst fields
>>
>> What is the reason/"use case" of this flexibility?
> 
> The purpose is to emulate pkt generator behaviors.
> 

'flowgen' forwarding is already to emulate pkt generator, but it was only
changing destination IP.

What additional benefit does changing udp ports of the packets brings? What is
your usecase for this change?

>>
>>>  2. Improve multi-core performance by using per-core vars>
>>
>> On multi core this also has syncronization problem, OK to make it per-core. Do
>> you have any observed performance difference, if so how much is it?
> 
> Huge difference, one example: 8 core flowgen -> rxonly results: 43
> Mpps (per-core) vs. 9.3 Mpps (shared), of course the numbers "varies
> depending on system configuration".
> 

Thanks for clarification.

>>
>> And can you please separate this to its own patch? This can be before ip/udp update.
> 
> Will do.
> 
>>
>>> v2: fix assigning ip header cksum
>>>
>>
>> +1 to update, can you please make it as seperate patch?
> 
> Sure.
> 
>>
>> So overall this can be a patchset with 4 patches:
>> 1- Fix retry logic (nb_rx -> nb_pkt)
>> 2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
>> 3- User per-core varible (for 'next_flow')
>> 4- Support ip/udp src/dst variaty of packets
>>
> 
> Great summary. Thanks a lot.
> 
>>> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
>>> ---
>>>  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
>>>  1 file changed, 86 insertions(+), 51 deletions(-)
>>>
>>
>> <...>
>>
>>> @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
>>>               }
>>>               pkts_burst[nb_pkt] = pkt;
>>>
>>> -             next_flow = (next_flow + 1) % cfg_n_flows;
>>> +             if (++next_udp_dst < cfg_n_udp_dst)
>>> +                     continue;
>>> +             next_udp_dst = 0;
>>> +             if (++next_udp_src < cfg_n_udp_src)
>>> +                     continue;
>>> +             next_udp_src = 0;
>>> +             if (++next_ip_dst < cfg_n_ip_dst)
>>> +                     continue;
>>> +             next_ip_dst = 0;
>>> +             if (++next_ip_src < cfg_n_ip_src)
>>> +                     continue;
>>> +             next_ip_src = 0;
>>
>> What is the logic here, can you please clarifiy the packet generation logic both
>> in a comment here and in the commit log?
> 
> It's round-robin field by field. Will add the comments.
> 

Thanks. If the receiving end is doing RSS based on IP address, dst address will
change in every 100 packets and src will change in every 10000 packets. This is
a slight behavior change.

When it was only dst ip, it was simple to just increment it, not sure about it
in this case. I wonder if we should set all randomly for each packet. I don't
know what is the better logic here, we can discuss it more in the next version.

>>
>>>       }
>>>
>>>       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
>>>       /*
>>>        * Retry if necessary
>>>        */
>>> -     if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
>>> +     if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
>>>               retry = 0;
>>> -             while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
>>> +             while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
>>>                       rte_delay_us(burst_tx_delay_time);
>>>                       nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
>>> -                                     &pkts_burst[nb_tx], nb_rx - nb_tx);
>>> +                                     &pkts_burst[nb_tx], nb_pkt - nb_tx);
>>>               }
>>
>> +1 to this fix, thanks for it. But can you please make a seperate patch for
>> this, with proper 'Fixes:' tag etc..
> 
> Ok.
> 
>>
>>>       }
>>> -     fs->tx_packets += nb_tx;
>>>
>>>       inc_tx_burst_stats(fs, nb_tx);
>>> -     if (unlikely(nb_tx < nb_pkt)) {
>>> -             /* Back out the flow counter. */
>>> -             next_flow -= (nb_pkt - nb_tx);
>>> -             while (next_flow < 0)
>>> -                     next_flow += cfg_n_flows;
>>> +     fs->tx_packets += nb_tx;
>>> +     /* Catch up flow idx by actual sent. */
>>> +     for (i = 0; i < nb_tx; ++i) {
>>> +             RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
>>> +             if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
>>> +                     continue;
>>> +             RTE_PER_LCORE(_next_udp_dst) = 0;
>>> +             RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
>>> +             if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
>>> +                     continue;
>>> +             RTE_PER_LCORE(_next_udp_src) = 0;
>>> +             RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
>>> +             if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
>>> +                     continue;
>>> +             RTE_PER_LCORE(_next_ip_dst) = 0;
>>> +             RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
>>> +             if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
>>> +                     continue;
>>> +             RTE_PER_LCORE(_next_ip_src) = 0;
>>> +     }
>>
>> Why per-core variables are not used in forward function, but local variables
>> (like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
>> impact?
>>
>> And why not directly assign from local variables to per-core variables, but have
>> above catch up loop?
>>
>>
> 
> Local vars are for generating pkts, global ones catch up finally when
> nb_tx is clear.

Why you are not using global ones to generate packets? This removes the need for
catch up?

> So flow indexes only increase by actual sent pkt number.
> It serves the same purpose of the original "/* backout the flow counter */".
> My math isn't good enough to make it look more intelligent though.
> 

Maybe I am missing something, for this case why not just assign back from locals
to globals?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-10  9:12       ` Ferruh Yigit
@ 2021-08-11  2:48         ` 王志宏
  2021-08-11 10:31           ` Ferruh Yigit
  0 siblings, 1 reply; 32+ messages in thread
From: 王志宏 @ 2021-08-11  2:48 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, xiaoyun.li, Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

On Tue, Aug 10, 2021 at 5:12 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 8/10/2021 8:57 AM, 王志宏 wrote:
> > Thanks for the review Ferruh :)
> >
> > On Mon, Aug 9, 2021 at 11:18 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> >>
> >> On 8/9/2021 7:52 AM, Zhihong Wang wrote:
> >>> This patch aims to:
> >>>  1. Add flexibility by supporting IP & UDP src/dst fields
> >>
> >> What is the reason/"use case" of this flexibility?
> >
> > The purpose is to emulate pkt generator behaviors.
> >
>
> 'flowgen' forwarding is already to emulate pkt generator, but it was only
> changing destination IP.
>
> What additional benefit does changing udp ports of the packets brings? What is
> your usecase for this change?

Pkt generators like pktgen/trex/ixia/spirent can change various fields
including ip/udp src/dst.

Keeping the cfg_n_* while setting cfg_n_ip_dst = 1024 and others = 1
makes the default behavior exactly unchanged. Do you think it makes
sense?

>
> >>
> >>>  2. Improve multi-core performance by using per-core vars>
> >>
> >> On multi core this also has syncronization problem, OK to make it per-core. Do
> >> you have any observed performance difference, if so how much is it?
> >
> > Huge difference, one example: 8 core flowgen -> rxonly results: 43
> > Mpps (per-core) vs. 9.3 Mpps (shared), of course the numbers "varies
> > depending on system configuration".
> >
>
> Thanks for clarification.
>
> >>
> >> And can you please separate this to its own patch? This can be before ip/udp update.
> >
> > Will do.
> >
> >>
> >>> v2: fix assigning ip header cksum
> >>>
> >>
> >> +1 to update, can you please make it as seperate patch?
> >
> > Sure.
> >
> >>
> >> So overall this can be a patchset with 4 patches:
> >> 1- Fix retry logic (nb_rx -> nb_pkt)
> >> 2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
> >> 3- User per-core varible (for 'next_flow')
> >> 4- Support ip/udp src/dst variaty of packets
> >>
> >
> > Great summary. Thanks a lot.
> >
> >>> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> >>> ---
> >>>  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
> >>>  1 file changed, 86 insertions(+), 51 deletions(-)
> >>>
> >>
> >> <...>
> >>
> >>> @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> >>>               }
> >>>               pkts_burst[nb_pkt] = pkt;
> >>>
> >>> -             next_flow = (next_flow + 1) % cfg_n_flows;
> >>> +             if (++next_udp_dst < cfg_n_udp_dst)
> >>> +                     continue;
> >>> +             next_udp_dst = 0;
> >>> +             if (++next_udp_src < cfg_n_udp_src)
> >>> +                     continue;
> >>> +             next_udp_src = 0;
> >>> +             if (++next_ip_dst < cfg_n_ip_dst)
> >>> +                     continue;
> >>> +             next_ip_dst = 0;
> >>> +             if (++next_ip_src < cfg_n_ip_src)
> >>> +                     continue;
> >>> +             next_ip_src = 0;
> >>
> >> What is the logic here, can you please clarifiy the packet generation logic both
> >> in a comment here and in the commit log?
> >
> > It's round-robin field by field. Will add the comments.
> >
>
> Thanks. If the receiving end is doing RSS based on IP address, dst address will
> change in every 100 packets and src will change in every 10000 packets. This is
> a slight behavior change.
>
> When it was only dst ip, it was simple to just increment it, not sure about it
> in this case. I wonder if we should set all randomly for each packet. I don't
> know what is the better logic here, we can discuss it more in the next version.

A more sophisticated pkt generator provides various options among
"step-by-step" / "random" / etc.

But supporting multiple fields naturally brings this implicitly. It
won't be a problem as it can be configured by setting the cfg_n_* as
we discussed above.

I think rte_rand() is a good option, anyway this can be tweaked easily
once the framework becomes shaped.

>
> >>
> >>>       }
> >>>
> >>>       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
> >>>       /*
> >>>        * Retry if necessary
> >>>        */
> >>> -     if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
> >>> +     if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
> >>>               retry = 0;
> >>> -             while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
> >>> +             while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
> >>>                       rte_delay_us(burst_tx_delay_time);
> >>>                       nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
> >>> -                                     &pkts_burst[nb_tx], nb_rx - nb_tx);
> >>> +                                     &pkts_burst[nb_tx], nb_pkt - nb_tx);
> >>>               }
> >>
> >> +1 to this fix, thanks for it. But can you please make a seperate patch for
> >> this, with proper 'Fixes:' tag etc..
> >
> > Ok.
> >
> >>
> >>>       }
> >>> -     fs->tx_packets += nb_tx;
> >>>
> >>>       inc_tx_burst_stats(fs, nb_tx);
> >>> -     if (unlikely(nb_tx < nb_pkt)) {
> >>> -             /* Back out the flow counter. */
> >>> -             next_flow -= (nb_pkt - nb_tx);
> >>> -             while (next_flow < 0)
> >>> -                     next_flow += cfg_n_flows;
> >>> +     fs->tx_packets += nb_tx;
> >>> +     /* Catch up flow idx by actual sent. */
> >>> +     for (i = 0; i < nb_tx; ++i) {
> >>> +             RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
> >>> +             if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
> >>> +                     continue;
> >>> +             RTE_PER_LCORE(_next_udp_dst) = 0;
> >>> +             RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
> >>> +             if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
> >>> +                     continue;
> >>> +             RTE_PER_LCORE(_next_udp_src) = 0;
> >>> +             RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
> >>> +             if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
> >>> +                     continue;
> >>> +             RTE_PER_LCORE(_next_ip_dst) = 0;
> >>> +             RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
> >>> +             if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
> >>> +                     continue;
> >>> +             RTE_PER_LCORE(_next_ip_src) = 0;
> >>> +     }
> >>
> >> Why per-core variables are not used in forward function, but local variables
> >> (like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
> >> impact?
> >>
> >> And why not directly assign from local variables to per-core variables, but have
> >> above catch up loop?
> >>
> >>
> >
> > Local vars are for generating pkts, global ones catch up finally when
> > nb_tx is clear.
>
> Why you are not using global ones to generate packets? This removes the need for
> catch up?

When there are multiple fields, back out the overran index caused by
dropped packets is not that straightforward -- It's the "carry" issue
in adding.

>
> > So flow indexes only increase by actual sent pkt number.
> > It serves the same purpose of the original "/* backout the flow counter */".
> > My math isn't good enough to make it look more intelligent though.
> >
>
> Maybe I am missing something, for this case why not just assign back from locals
> to globals?

As above.

However, this can be simplified if we discard the "back out"
mechanism: generate 32 pkts and send 20 of them while the rest 12 are
dropped, the difference is that is the idx gonna start from 21 or 33
next time?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-11  2:48         ` 王志宏
@ 2021-08-11 10:31           ` Ferruh Yigit
  2021-08-12  9:32             ` 王志宏
  0 siblings, 1 reply; 32+ messages in thread
From: Ferruh Yigit @ 2021-08-11 10:31 UTC (permalink / raw)
  To: 王志宏
  Cc: dev, xiaoyun.li, Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

On 8/11/2021 3:48 AM, 王志宏 wrote:
> On Tue, Aug 10, 2021 at 5:12 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>> On 8/10/2021 8:57 AM, 王志宏 wrote:
>>> Thanks for the review Ferruh :)
>>>
>>> On Mon, Aug 9, 2021 at 11:18 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>>>
>>>> On 8/9/2021 7:52 AM, Zhihong Wang wrote:
>>>>> This patch aims to:
>>>>>  1. Add flexibility by supporting IP & UDP src/dst fields
>>>>
>>>> What is the reason/"use case" of this flexibility?
>>>
>>> The purpose is to emulate pkt generator behaviors.
>>>
>>
>> 'flowgen' forwarding is already to emulate pkt generator, but it was only
>> changing destination IP.
>>
>> What additional benefit does changing udp ports of the packets brings? What is
>> your usecase for this change?
> 
> Pkt generators like pktgen/trex/ixia/spirent can change various fields
> including ip/udp src/dst.
> 

But testpmd is not packet generator, it has very simple 'flowgen' forwarding
engine, I would like to understand motivation to make it more complex.

> Keeping the cfg_n_* while setting cfg_n_ip_dst = 1024 and others = 1
> makes the default behavior exactly unchanged. Do you think it makes
> sense?
> 
>>
>>>>
>>>>>  2. Improve multi-core performance by using per-core vars>
>>>>
>>>> On multi core this also has syncronization problem, OK to make it per-core. Do
>>>> you have any observed performance difference, if so how much is it?
>>>
>>> Huge difference, one example: 8 core flowgen -> rxonly results: 43
>>> Mpps (per-core) vs. 9.3 Mpps (shared), of course the numbers "varies
>>> depending on system configuration".
>>>
>>
>> Thanks for clarification.
>>
>>>>
>>>> And can you please separate this to its own patch? This can be before ip/udp update.
>>>
>>> Will do.
>>>
>>>>
>>>>> v2: fix assigning ip header cksum
>>>>>
>>>>
>>>> +1 to update, can you please make it as seperate patch?
>>>
>>> Sure.
>>>
>>>>
>>>> So overall this can be a patchset with 4 patches:
>>>> 1- Fix retry logic (nb_rx -> nb_pkt)
>>>> 2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
>>>> 3- User per-core varible (for 'next_flow')
>>>> 4- Support ip/udp src/dst variaty of packets
>>>>
>>>
>>> Great summary. Thanks a lot.
>>>
>>>>> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
>>>>> ---
>>>>>  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
>>>>>  1 file changed, 86 insertions(+), 51 deletions(-)
>>>>>
>>>>
>>>> <...>
>>>>
>>>>> @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
>>>>>               }
>>>>>               pkts_burst[nb_pkt] = pkt;
>>>>>
>>>>> -             next_flow = (next_flow + 1) % cfg_n_flows;
>>>>> +             if (++next_udp_dst < cfg_n_udp_dst)
>>>>> +                     continue;
>>>>> +             next_udp_dst = 0;
>>>>> +             if (++next_udp_src < cfg_n_udp_src)
>>>>> +                     continue;
>>>>> +             next_udp_src = 0;
>>>>> +             if (++next_ip_dst < cfg_n_ip_dst)
>>>>> +                     continue;
>>>>> +             next_ip_dst = 0;
>>>>> +             if (++next_ip_src < cfg_n_ip_src)
>>>>> +                     continue;
>>>>> +             next_ip_src = 0;
>>>>
>>>> What is the logic here, can you please clarifiy the packet generation logic both
>>>> in a comment here and in the commit log?
>>>
>>> It's round-robin field by field. Will add the comments.
>>>
>>
>> Thanks. If the receiving end is doing RSS based on IP address, dst address will
>> change in every 100 packets and src will change in every 10000 packets. This is
>> a slight behavior change.
>>
>> When it was only dst ip, it was simple to just increment it, not sure about it
>> in this case. I wonder if we should set all randomly for each packet. I don't
>> know what is the better logic here, we can discuss it more in the next version.
> 
> A more sophisticated pkt generator provides various options among
> "step-by-step" / "random" / etc.
> 
> But supporting multiple fields naturally brings this implicitly. It
> won't be a problem as it can be configured by setting the cfg_n_* as
> we discussed above.
> 
> I think rte_rand() is a good option, anyway this can be tweaked easily
> once the framework becomes shaped.
> 

Can be done, but do we really want to add more packet generator capability to
testpmd?

>>
>>>>
>>>>>       }
>>>>>
>>>>>       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
>>>>>       /*
>>>>>        * Retry if necessary
>>>>>        */
>>>>> -     if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
>>>>> +     if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
>>>>>               retry = 0;
>>>>> -             while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
>>>>> +             while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
>>>>>                       rte_delay_us(burst_tx_delay_time);
>>>>>                       nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
>>>>> -                                     &pkts_burst[nb_tx], nb_rx - nb_tx);
>>>>> +                                     &pkts_burst[nb_tx], nb_pkt - nb_tx);
>>>>>               }
>>>>
>>>> +1 to this fix, thanks for it. But can you please make a seperate patch for
>>>> this, with proper 'Fixes:' tag etc..
>>>
>>> Ok.
>>>
>>>>
>>>>>       }
>>>>> -     fs->tx_packets += nb_tx;
>>>>>
>>>>>       inc_tx_burst_stats(fs, nb_tx);
>>>>> -     if (unlikely(nb_tx < nb_pkt)) {
>>>>> -             /* Back out the flow counter. */
>>>>> -             next_flow -= (nb_pkt - nb_tx);
>>>>> -             while (next_flow < 0)
>>>>> -                     next_flow += cfg_n_flows;
>>>>> +     fs->tx_packets += nb_tx;
>>>>> +     /* Catch up flow idx by actual sent. */
>>>>> +     for (i = 0; i < nb_tx; ++i) {
>>>>> +             RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
>>>>> +             if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
>>>>> +                     continue;
>>>>> +             RTE_PER_LCORE(_next_udp_dst) = 0;
>>>>> +             RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
>>>>> +             if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
>>>>> +                     continue;
>>>>> +             RTE_PER_LCORE(_next_udp_src) = 0;
>>>>> +             RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
>>>>> +             if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
>>>>> +                     continue;
>>>>> +             RTE_PER_LCORE(_next_ip_dst) = 0;
>>>>> +             RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
>>>>> +             if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
>>>>> +                     continue;
>>>>> +             RTE_PER_LCORE(_next_ip_src) = 0;
>>>>> +     }
>>>>
>>>> Why per-core variables are not used in forward function, but local variables
>>>> (like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
>>>> impact?
>>>>
>>>> And why not directly assign from local variables to per-core variables, but have
>>>> above catch up loop?
>>>>
>>>>
>>>
>>> Local vars are for generating pkts, global ones catch up finally when
>>> nb_tx is clear.
>>
>> Why you are not using global ones to generate packets? This removes the need for
>> catch up?
> 
> When there are multiple fields, back out the overran index caused by
> dropped packets is not that straightforward -- It's the "carry" issue
> in adding.
> 
>>
>>> So flow indexes only increase by actual sent pkt number.
>>> It serves the same purpose of the original "/* backout the flow counter */".
>>> My math isn't good enough to make it look more intelligent though.
>>>
>>
>> Maybe I am missing something, for this case why not just assign back from locals
>> to globals?
> 
> As above.
> 
> However, this can be simplified if we discard the "back out"
> mechanism: generate 32 pkts and send 20 of them while the rest 12 are
> dropped, the difference is that is the idx gonna start from 21 or 33
> next time?
> 

I am not sure point of "back out", I think we can remove it unless there is no
objection, so receiving end can recognize failed packets.


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] Re: [PATCH v2] app/testpmd: flowgen support ip and udp fields
  2021-08-11 10:31           ` Ferruh Yigit
@ 2021-08-12  9:32             ` 王志宏
  0 siblings, 0 replies; 32+ messages in thread
From: 王志宏 @ 2021-08-12  9:32 UTC (permalink / raw)
  To: Ferruh Yigit
  Cc: dev, xiaoyun.li, Singh, Aman Deep, Igor Russkikh, Cyril Chemparathy

On Wed, Aug 11, 2021 at 6:31 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>
> On 8/11/2021 3:48 AM, 王志宏 wrote:
> > On Tue, Aug 10, 2021 at 5:12 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> >>
> >> On 8/10/2021 8:57 AM, 王志宏 wrote:
> >>> Thanks for the review Ferruh :)
> >>>
> >>> On Mon, Aug 9, 2021 at 11:18 PM Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> >>>>
> >>>> On 8/9/2021 7:52 AM, Zhihong Wang wrote:
> >>>>> This patch aims to:
> >>>>>  1. Add flexibility by supporting IP & UDP src/dst fields
> >>>>
> >>>> What is the reason/"use case" of this flexibility?
> >>>
> >>> The purpose is to emulate pkt generator behaviors.
> >>>
> >>
> >> 'flowgen' forwarding is already to emulate pkt generator, but it was only
> >> changing destination IP.
> >>
> >> What additional benefit does changing udp ports of the packets brings? What is
> >> your usecase for this change?
> >
> > Pkt generators like pktgen/trex/ixia/spirent can change various fields
> > including ip/udp src/dst.
> >
>
> But testpmd is not packet generator, it has very simple 'flowgen' forwarding
> engine, I would like to understand motivation to make it more complex.

I agree this *simplicity* point. In fact my sole intention is to make
flowgen useable for multi-core test. I'll keep the original setup in
the next patch.

>
> > Keeping the cfg_n_* while setting cfg_n_ip_dst = 1024 and others = 1
> > makes the default behavior exactly unchanged. Do you think it makes
> > sense?
> >
> >>
> >>>>
> >>>>>  2. Improve multi-core performance by using per-core vars>
> >>>>
> >>>> On multi core this also has syncronization problem, OK to make it per-core. Do
> >>>> you have any observed performance difference, if so how much is it?
> >>>
> >>> Huge difference, one example: 8 core flowgen -> rxonly results: 43
> >>> Mpps (per-core) vs. 9.3 Mpps (shared), of course the numbers "varies
> >>> depending on system configuration".
> >>>
> >>
> >> Thanks for clarification.
> >>
> >>>>
> >>>> And can you please separate this to its own patch? This can be before ip/udp update.
> >>>
> >>> Will do.
> >>>
> >>>>
> >>>>> v2: fix assigning ip header cksum
> >>>>>
> >>>>
> >>>> +1 to update, can you please make it as seperate patch?
> >>>
> >>> Sure.
> >>>
> >>>>
> >>>> So overall this can be a patchset with 4 patches:
> >>>> 1- Fix retry logic (nb_rx -> nb_pkt)
> >>>> 2- Use 'rte_ipv4_cksum()' API (instead of static 'ip_sum()')
> >>>> 3- User per-core varible (for 'next_flow')
> >>>> 4- Support ip/udp src/dst variaty of packets
> >>>>
> >>>
> >>> Great summary. Thanks a lot.
> >>>
> >>>>> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> >>>>> ---
> >>>>>  app/test-pmd/flowgen.c | 137 +++++++++++++++++++++++++++++++------------------
> >>>>>  1 file changed, 86 insertions(+), 51 deletions(-)
> >>>>>
> >>>>
> >>>> <...>
> >>>>
> >>>>> @@ -185,30 +193,57 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
> >>>>>               }
> >>>>>               pkts_burst[nb_pkt] = pkt;
> >>>>>
> >>>>> -             next_flow = (next_flow + 1) % cfg_n_flows;
> >>>>> +             if (++next_udp_dst < cfg_n_udp_dst)
> >>>>> +                     continue;
> >>>>> +             next_udp_dst = 0;
> >>>>> +             if (++next_udp_src < cfg_n_udp_src)
> >>>>> +                     continue;
> >>>>> +             next_udp_src = 0;
> >>>>> +             if (++next_ip_dst < cfg_n_ip_dst)
> >>>>> +                     continue;
> >>>>> +             next_ip_dst = 0;
> >>>>> +             if (++next_ip_src < cfg_n_ip_src)
> >>>>> +                     continue;
> >>>>> +             next_ip_src = 0;
> >>>>
> >>>> What is the logic here, can you please clarifiy the packet generation logic both
> >>>> in a comment here and in the commit log?
> >>>
> >>> It's round-robin field by field. Will add the comments.
> >>>
> >>
> >> Thanks. If the receiving end is doing RSS based on IP address, dst address will
> >> change in every 100 packets and src will change in every 10000 packets. This is
> >> a slight behavior change.
> >>
> >> When it was only dst ip, it was simple to just increment it, not sure about it
> >> in this case. I wonder if we should set all randomly for each packet. I don't
> >> know what is the better logic here, we can discuss it more in the next version.
> >
> > A more sophisticated pkt generator provides various options among
> > "step-by-step" / "random" / etc.
> >
> > But supporting multiple fields naturally brings this implicitly. It
> > won't be a problem as it can be configured by setting the cfg_n_* as
> > we discussed above.
> >
> > I think rte_rand() is a good option, anyway this can be tweaked easily
> > once the framework becomes shaped.
> >
>
> Can be done, but do we really want to add more packet generator capability to
> testpmd?
>
> >>
> >>>>
> >>>>>       }
> >>>>>
> >>>>>       nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
> >>>>>       /*
> >>>>>        * Retry if necessary
> >>>>>        */
> >>>>> -     if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
> >>>>> +     if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
> >>>>>               retry = 0;
> >>>>> -             while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
> >>>>> +             while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
> >>>>>                       rte_delay_us(burst_tx_delay_time);
> >>>>>                       nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
> >>>>> -                                     &pkts_burst[nb_tx], nb_rx - nb_tx);
> >>>>> +                                     &pkts_burst[nb_tx], nb_pkt - nb_tx);
> >>>>>               }
> >>>>
> >>>> +1 to this fix, thanks for it. But can you please make a seperate patch for
> >>>> this, with proper 'Fixes:' tag etc..
> >>>
> >>> Ok.
> >>>
> >>>>
> >>>>>       }
> >>>>> -     fs->tx_packets += nb_tx;
> >>>>>
> >>>>>       inc_tx_burst_stats(fs, nb_tx);
> >>>>> -     if (unlikely(nb_tx < nb_pkt)) {
> >>>>> -             /* Back out the flow counter. */
> >>>>> -             next_flow -= (nb_pkt - nb_tx);
> >>>>> -             while (next_flow < 0)
> >>>>> -                     next_flow += cfg_n_flows;
> >>>>> +     fs->tx_packets += nb_tx;
> >>>>> +     /* Catch up flow idx by actual sent. */
> >>>>> +     for (i = 0; i < nb_tx; ++i) {
> >>>>> +             RTE_PER_LCORE(_next_udp_dst) = RTE_PER_LCORE(_next_udp_dst) + 1;
> >>>>> +             if (RTE_PER_LCORE(_next_udp_dst) < cfg_n_udp_dst)
> >>>>> +                     continue;
> >>>>> +             RTE_PER_LCORE(_next_udp_dst) = 0;
> >>>>> +             RTE_PER_LCORE(_next_udp_src) = RTE_PER_LCORE(_next_udp_src) + 1;
> >>>>> +             if (RTE_PER_LCORE(_next_udp_src) < cfg_n_udp_src)
> >>>>> +                     continue;
> >>>>> +             RTE_PER_LCORE(_next_udp_src) = 0;
> >>>>> +             RTE_PER_LCORE(_next_ip_dst) = RTE_PER_LCORE(_next_ip_dst) + 1;
> >>>>> +             if (RTE_PER_LCORE(_next_ip_dst) < cfg_n_ip_dst)
> >>>>> +                     continue;
> >>>>> +             RTE_PER_LCORE(_next_ip_dst) = 0;
> >>>>> +             RTE_PER_LCORE(_next_ip_src) = RTE_PER_LCORE(_next_ip_src) + 1;
> >>>>> +             if (RTE_PER_LCORE(_next_ip_src) < cfg_n_ip_src)
> >>>>> +                     continue;
> >>>>> +             RTE_PER_LCORE(_next_ip_src) = 0;
> >>>>> +     }
> >>>>
> >>>> Why per-core variables are not used in forward function, but local variables
> >>>> (like 'next_ip_src' etc..) used? Is it for the performance, if so what is the
> >>>> impact?
> >>>>
> >>>> And why not directly assign from local variables to per-core variables, but have
> >>>> above catch up loop?
> >>>>
> >>>>
> >>>
> >>> Local vars are for generating pkts, global ones catch up finally when
> >>> nb_tx is clear.
> >>
> >> Why you are not using global ones to generate packets? This removes the need for
> >> catch up?
> >
> > When there are multiple fields, back out the overran index caused by
> > dropped packets is not that straightforward -- It's the "carry" issue
> > in adding.
> >
> >>
> >>> So flow indexes only increase by actual sent pkt number.
> >>> It serves the same purpose of the original "/* backout the flow counter */".
> >>> My math isn't good enough to make it look more intelligent though.
> >>>
> >>
> >> Maybe I am missing something, for this case why not just assign back from locals
> >> to globals?
> >
> > As above.
> >
> > However, this can be simplified if we discard the "back out"
> > mechanism: generate 32 pkts and send 20 of them while the rest 12 are
> > dropped, the difference is that is the idx gonna start from 21 or 33
> > next time?
> >
>
> I am not sure point of "back out", I think we can remove it unless there is no
> objection, so receiving end can recognize failed packets.
>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements
  2021-08-09  6:25 [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields Zhihong Wang
  2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
@ 2021-08-12 11:11 ` Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
                     ` (3 more replies)
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  3 siblings, 4 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 11:11 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

This series fixes a tx retry defect and improves multi-core performance
by using per-core variable for flow indexing.

v3: split changes and keep original flow generation logic
v2: fix assigning ip header cksum

Zhihong Wang (4):
  app/testpmd: fix tx retry in flowgen
  app/testpmd: use rte_ipv4_cksum in flowgen
  app/testpmd: record rx_burst and fwd_dropped in flowgen
  app/testpmd: use per-core variable in flowgen

 app/test-pmd/flowgen.c | 47 ++++++++++++++---------------------------------
 1 file changed, 14 insertions(+), 33 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] app/testpmd: fix tx retry in flowgen
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
@ 2021-08-12 11:11   ` Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 11:11 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang, stable

Fix tx_pkt number in tx retry logic.

Fixes: bf56fce1fb4 ("app/testpmd: add retry option")
Cc: stable@dpdk.org

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 3bf6e1ce97..f2e6255c36 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -192,12 +192,12 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/*
 	 * Retry if necessary
 	 */
-	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
+	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
 		retry = 0;
-		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
 			rte_delay_us(burst_tx_delay_time);
 			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
-					&pkts_burst[nb_tx], nb_rx - nb_tx);
+					&pkts_burst[nb_tx], nb_pkt - nb_tx);
 		}
 	}
 	fs->tx_packets += nb_tx;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] app/testpmd: use rte_ipv4_cksum in flowgen
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
@ 2021-08-12 11:11   ` Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 4/4] app/testpmd: use per-core variable " Zhihong Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 11:11 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use the rte_ipv4_cksum API to replace local ip_sum implementation.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index f2e6255c36..96d0cc79df 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -53,28 +53,6 @@ static struct rte_ether_addr cfg_ether_dst =
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
-/* Use this type to inform GCC that ip_sum violates aliasing rules. */
-typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__));
-
-static inline uint16_t
-ip_sum(const alias_int16_t *hdr, int hdr_len)
-{
-	uint32_t sum = 0;
-
-	while (hdr_len > 1)
-	{
-		sum += *hdr++;
-		if (sum & 0x80000000)
-			sum = (sum & 0xFFFF) + (sum >> 16);
-		hdr_len -= 2;
-	}
-
-	while (sum >> 16)
-		sum = (sum & 0xFFFF) + (sum >> 16);
-
-	return ~sum;
-}
-
 /*
  * Multi-flow generation mode.
  *
@@ -160,8 +138,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 								   next_flow);
 			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
 								   sizeof(*eth_hdr));
-			ip_hdr->hdr_checksum	= ip_sum((const alias_int16_t *)ip_hdr,
-							 sizeof(*ip_hdr));
+			ip_hdr->hdr_checksum	= rte_ipv4_cksum(ip_hdr);
 
 			/* Initialize UDP header. */
 			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] app/testpmd: record rx_burst and fwd_dropped in flowgen
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
@ 2021-08-12 11:11   ` Zhihong Wang
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 4/4] app/testpmd: use per-core variable " Zhihong Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 11:11 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Call inc_rx_burst_stats for rx operation, and record fwd_dropped.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 96d0cc79df..229794ee9c 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -87,6 +87,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/* Receive a burst of packets and discard them. */
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
 				 nb_pkt_per_burst);
+	inc_rx_burst_stats(fs, nb_rx);
 	fs->rx_packets += nb_rx;
 
 	for (i = 0; i < nb_rx; i++)
@@ -186,6 +187,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		while (next_flow < 0)
 			next_flow += cfg_n_flows;
 
+		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_pkt);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] app/testpmd: use per-core variable in flowgen
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
                     ` (2 preceding siblings ...)
  2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
@ 2021-08-12 11:11   ` Zhihong Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 11:11 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use per-core variable for flow indexing to solve cache contention in
multi-core scenarios.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 229794ee9c..fc9dae4ab3 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -51,6 +51,8 @@ static struct rte_ether_addr cfg_ether_src =
 static struct rte_ether_addr cfg_ether_dst =
 	{{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x01 }};
 
+RTE_DEFINE_PER_LCORE(int, _next_flow);
+
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
 /*
@@ -80,7 +82,6 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint32_t retry;
 	uint64_t tx_offloads;
 	uint64_t start_tsc = 0;
-	static int next_flow = 0;
 
 	get_start_cycles(&start_tsc);
 
@@ -136,7 +137,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 			ip_hdr->packet_id	= 0;
 			ip_hdr->src_addr	= rte_cpu_to_be_32(cfg_ip_src);
 			ip_hdr->dst_addr	= rte_cpu_to_be_32(cfg_ip_dst +
-								   next_flow);
+					RTE_PER_LCORE(_next_flow));
 			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
 								   sizeof(*eth_hdr));
 			ip_hdr->hdr_checksum	= rte_ipv4_cksum(ip_hdr);
@@ -163,7 +164,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		}
 		pkts_burst[nb_pkt] = pkt;
 
-		next_flow = (next_flow + 1) % cfg_n_flows;
+		RTE_PER_LCORE(_next_flow) = (RTE_PER_LCORE(_next_flow) + 1) %
+				cfg_n_flows;
 	}
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
@@ -183,9 +185,9 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	inc_tx_burst_stats(fs, nb_tx);
 	if (unlikely(nb_tx < nb_pkt)) {
 		/* Back out the flow counter. */
-		next_flow -= (nb_pkt - nb_tx);
-		while (next_flow < 0)
-			next_flow += cfg_n_flows;
+		RTE_PER_LCORE(_next_flow) -= (nb_pkt - nb_tx);
+		while (RTE_PER_LCORE(_next_flow) < 0)
+			RTE_PER_LCORE(_next_flow) += cfg_n_flows;
 
 		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements
  2021-08-09  6:25 [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields Zhihong Wang
  2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
  2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
@ 2021-08-12 13:18 ` Zhihong Wang
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
                     ` (3 more replies)
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  3 siblings, 4 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 13:18 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

This series fixes a tx retry defect and improves multi-core performance
by using per-core variable for flow indexing.

v4: use loop local variable to improve performance
v3: split changes and keep original flow generation logic
v2: fix assigning ip header cksum

Zhihong Wang (4):
  app/testpmd: fix tx retry in flowgen
  app/testpmd: use rte_ipv4_cksum in flowgen
  app/testpmd: record rx_burst and fwd_dropped in flowgen
  app/testpmd: use per-core variable in flowgen

 app/test-pmd/flowgen.c | 47 ++++++++++++++---------------------------------
 1 file changed, 14 insertions(+), 33 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
@ 2021-08-12 13:18   ` Zhihong Wang
  2021-08-13  1:33     ` Li, Xiaoyun
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 13:18 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang, stable

Fix tx_pkt number in tx retry logic.

Fixes: bf56fce1fb4 ("app/testpmd: add retry option")
Cc: stable@dpdk.org

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 3bf6e1ce97..f2e6255c36 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -192,12 +192,12 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/*
 	 * Retry if necessary
 	 */
-	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
+	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
 		retry = 0;
-		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
 			rte_delay_us(burst_tx_delay_time);
 			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
-					&pkts_burst[nb_tx], nb_rx - nb_tx);
+					&pkts_burst[nb_tx], nb_pkt - nb_tx);
 		}
 	}
 	fs->tx_packets += nb_tx;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum in flowgen
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
@ 2021-08-12 13:18   ` Zhihong Wang
  2021-08-13  1:37     ` Li, Xiaoyun
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable " Zhihong Wang
  3 siblings, 1 reply; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 13:18 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use the rte_ipv4_cksum API to replace local ip_sum implementation.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index f2e6255c36..96d0cc79df 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -53,28 +53,6 @@ static struct rte_ether_addr cfg_ether_dst =
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
-/* Use this type to inform GCC that ip_sum violates aliasing rules. */
-typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__));
-
-static inline uint16_t
-ip_sum(const alias_int16_t *hdr, int hdr_len)
-{
-	uint32_t sum = 0;
-
-	while (hdr_len > 1)
-	{
-		sum += *hdr++;
-		if (sum & 0x80000000)
-			sum = (sum & 0xFFFF) + (sum >> 16);
-		hdr_len -= 2;
-	}
-
-	while (sum >> 16)
-		sum = (sum & 0xFFFF) + (sum >> 16);
-
-	return ~sum;
-}
-
 /*
  * Multi-flow generation mode.
  *
@@ -160,8 +138,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 								   next_flow);
 			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
 								   sizeof(*eth_hdr));
-			ip_hdr->hdr_checksum	= ip_sum((const alias_int16_t *)ip_hdr,
-							 sizeof(*ip_hdr));
+			ip_hdr->hdr_checksum	= rte_ipv4_cksum(ip_hdr);
 
 			/* Initialize UDP header. */
 			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped in flowgen
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
@ 2021-08-12 13:19   ` Zhihong Wang
  2021-08-13  1:44     ` Li, Xiaoyun
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable " Zhihong Wang
  3 siblings, 1 reply; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 13:19 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Call inc_rx_burst_stats for rx operation, and record fwd_dropped.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 96d0cc79df..229794ee9c 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -87,6 +87,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/* Receive a burst of packets and discard them. */
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
 				 nb_pkt_per_burst);
+	inc_rx_burst_stats(fs, nb_rx);
 	fs->rx_packets += nb_rx;
 
 	for (i = 0; i < nb_rx; i++)
@@ -186,6 +187,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		while (next_flow < 0)
 			next_flow += cfg_n_flows;
 
+		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_pkt);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable in flowgen
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
                     ` (2 preceding siblings ...)
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
@ 2021-08-12 13:19   ` Zhihong Wang
  2021-08-13  1:56     ` Li, Xiaoyun
  3 siblings, 1 reply; 32+ messages in thread
From: Zhihong Wang @ 2021-08-12 13:19 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use per-core variable for flow indexing to solve cache contention in
multi-core scenarios.

v4: use loop local variable to improve performance

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
---
 app/test-pmd/flowgen.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 229794ee9c..b541485304 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -53,6 +53,8 @@ static struct rte_ether_addr cfg_ether_dst =
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
+RTE_DEFINE_PER_LCORE(int, _next_flow);
+
 /*
  * Multi-flow generation mode.
  *
@@ -80,7 +82,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint32_t retry;
 	uint64_t tx_offloads;
 	uint64_t start_tsc = 0;
-	static int next_flow = 0;
+	int next_flow = RTE_PER_LCORE(_next_flow);
 
 	get_start_cycles(&start_tsc);
 
@@ -193,6 +195,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		} while (++nb_tx < nb_pkt);
 	}
 
+	RTE_PER_LCORE(_next_flow) = next_flow;
+
 	get_end_cycles(fs, start_tsc);
 }
 
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
@ 2021-08-13  1:33     ` Li, Xiaoyun
  2021-08-13  2:27       ` [dpdk-dev] [External] " 王志宏
  0 siblings, 1 reply; 32+ messages in thread
From: Li, Xiaoyun @ 2021-08-13  1:33 UTC (permalink / raw)
  To: Zhihong Wang, dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh,
	cchemparathy
  Cc: stable

Hi

> -----Original Message-----
> From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Sent: Thursday, August 12, 2021 21:19
> To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> irusskikh@marvell.com; cchemparathy@tilera.com
> Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>; stable@dpdk.org
> Subject: [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen
> 
> Fix tx_pkt number in tx retry logic.
> 
> Fixes: bf56fce1fb4 ("app/testpmd: add retry option")

Missing one character, it should be 12.
Fixes: bf56fce1fb45 ("app/testpmd: add retry option")

Except this, Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum in flowgen
  2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
@ 2021-08-13  1:37     ` Li, Xiaoyun
  0 siblings, 0 replies; 32+ messages in thread
From: Li, Xiaoyun @ 2021-08-13  1:37 UTC (permalink / raw)
  To: Zhihong Wang, dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh,
	cchemparathy

Hi

> -----Original Message-----
> From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Sent: Thursday, August 12, 2021 21:19
> To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> irusskikh@marvell.com; cchemparathy@tilera.com
> Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Subject: [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum in flowgen
> 
> Use the rte_ipv4_cksum API to replace local ip_sum implementation.
> 
> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>  app/test-pmd/flowgen.c | 25 +------------------------
>  1 file changed, 1 insertion(+), 24 deletions(-)
> 

Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped in flowgen
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
@ 2021-08-13  1:44     ` Li, Xiaoyun
  0 siblings, 0 replies; 32+ messages in thread
From: Li, Xiaoyun @ 2021-08-13  1:44 UTC (permalink / raw)
  To: Zhihong Wang, dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh,
	cchemparathy

> -----Original Message-----
> From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Sent: Thursday, August 12, 2021 21:19
> To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> irusskikh@marvell.com; cchemparathy@tilera.com
> Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Subject: [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped in
> flowgen
> 
> Call inc_rx_burst_stats for rx operation, and record fwd_dropped.
> 
> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>  app/test-pmd/flowgen.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable in flowgen
  2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable " Zhihong Wang
@ 2021-08-13  1:56     ` Li, Xiaoyun
  2021-08-13  2:35       ` [dpdk-dev] [External] " 王志宏
  0 siblings, 1 reply; 32+ messages in thread
From: Li, Xiaoyun @ 2021-08-13  1:56 UTC (permalink / raw)
  To: Zhihong Wang, dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh,
	cchemparathy


> -----Original Message-----
> From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Sent: Thursday, August 12, 2021 21:19
> To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> irusskikh@marvell.com; cchemparathy@tilera.com
> Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> Subject: [PATCH v4 4/4] app/testpmd: use per-core variable in flowgen
> 
> Use per-core variable for flow indexing to solve cache contention in multi-core
> scenarios.
> 
> v4: use loop local variable to improve performance

Usually, the changes should be after sign-off and "---" not in commit log.

> 
> Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> ---
>  app/test-pmd/flowgen.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 

Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] RE: [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen
  2021-08-13  1:33     ` Li, Xiaoyun
@ 2021-08-13  2:27       ` 王志宏
  0 siblings, 0 replies; 32+ messages in thread
From: 王志宏 @ 2021-08-13  2:27 UTC (permalink / raw)
  To: Li, Xiaoyun
  Cc: dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh, cchemparathy, stable

On Fri, Aug 13, 2021 at 9:33 AM Li, Xiaoyun <xiaoyun.li@intel.com> wrote:
>
> Hi
>
> > -----Original Message-----
> > From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> > Sent: Thursday, August 12, 2021 21:19
> > To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> > <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> > irusskikh@marvell.com; cchemparathy@tilera.com
> > Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>; stable@dpdk.org
> > Subject: [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen
> >
> > Fix tx_pkt number in tx retry logic.
> >
> > Fixes: bf56fce1fb4 ("app/testpmd: add retry option")
>
> Missing one character, it should be 12.

Got it. Thanks Xiaoyun.

> Fixes: bf56fce1fb45 ("app/testpmd: add retry option")
>
> Except this, Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [External] RE: [PATCH v4 4/4] app/testpmd: use per-core variable in flowgen
  2021-08-13  1:56     ` Li, Xiaoyun
@ 2021-08-13  2:35       ` 王志宏
  0 siblings, 0 replies; 32+ messages in thread
From: 王志宏 @ 2021-08-13  2:35 UTC (permalink / raw)
  To: Li, Xiaoyun; +Cc: dev, Yigit, Ferruh, Singh, Aman Deep, irusskikh, cchemparathy

On Fri, Aug 13, 2021 at 9:56 AM Li, Xiaoyun <xiaoyun.li@intel.com> wrote:
>
>
> > -----Original Message-----
> > From: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> > Sent: Thursday, August 12, 2021 21:19
> > To: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>; Li, Xiaoyun
> > <xiaoyun.li@intel.com>; Singh, Aman Deep <aman.deep.singh@intel.com>;
> > irusskikh@marvell.com; cchemparathy@tilera.com
> > Cc: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> > Subject: [PATCH v4 4/4] app/testpmd: use per-core variable in flowgen
> >
> > Use per-core variable for flow indexing to solve cache contention in multi-core
> > scenarios.
> >
> > v4: use loop local variable to improve performance
>
> Usually, the changes should be after sign-off and "---" not in commit log.

Ok. I'll wait to see if there's more comments and then send a v5 to
correct these. Thanks.

>
> >
> > Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
> > ---
> >  app/test-pmd/flowgen.c | 6 +++++-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> >
>
> Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements
  2021-08-09  6:25 [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields Zhihong Wang
                   ` (2 preceding siblings ...)
  2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
@ 2021-08-13  8:05 ` Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
                     ` (4 more replies)
  3 siblings, 5 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-13  8:05 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

This series fixes a tx retry defect and improves multi-core performance
by using per-core variable for flow indexing.

v5: replace modulo operation to improve performance
v4: use loop local variable to improve performance
v3: split changes and keep original flow generation logic
v2: fix assigning ip header cksum

Zhihong Wang (4):
  app/testpmd: fix tx retry in flowgen
  app/testpmd: use rte_ipv4_cksum in flowgen
  app/testpmd: record rx_burst and fwd_dropped in flowgen
  app/testpmd: use per-core variable in flowgen

 app/test-pmd/flowgen.c | 47 ++++++++++++++---------------------------------
 1 file changed, 14 insertions(+), 33 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v5 1/4] app/testpmd: fix tx retry in flowgen
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
@ 2021-08-13  8:05   ` Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-13  8:05 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang, stable

Fix tx_pkt number in tx retry logic.

Fixes: bf56fce1fb45 ("app/testpmd: add retry option")
Cc: stable@dpdk.org

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
 app/test-pmd/flowgen.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 3bf6e1ce97..f2e6255c36 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -192,12 +192,12 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/*
 	 * Retry if necessary
 	 */
-	if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
+	if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
 		retry = 0;
-		while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+		while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
 			rte_delay_us(burst_tx_delay_time);
 			nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
-					&pkts_burst[nb_tx], nb_rx - nb_tx);
+					&pkts_burst[nb_tx], nb_pkt - nb_tx);
 		}
 	}
 	fs->tx_packets += nb_tx;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v5 2/4] app/testpmd: use rte_ipv4_cksum in flowgen
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
@ 2021-08-13  8:05   ` Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-13  8:05 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use the rte_ipv4_cksum API to replace local ip_sum implementation.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
 app/test-pmd/flowgen.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index f2e6255c36..96d0cc79df 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -53,28 +53,6 @@ static struct rte_ether_addr cfg_ether_dst =
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
-/* Use this type to inform GCC that ip_sum violates aliasing rules. */
-typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__));
-
-static inline uint16_t
-ip_sum(const alias_int16_t *hdr, int hdr_len)
-{
-	uint32_t sum = 0;
-
-	while (hdr_len > 1)
-	{
-		sum += *hdr++;
-		if (sum & 0x80000000)
-			sum = (sum & 0xFFFF) + (sum >> 16);
-		hdr_len -= 2;
-	}
-
-	while (sum >> 16)
-		sum = (sum & 0xFFFF) + (sum >> 16);
-
-	return ~sum;
-}
-
 /*
  * Multi-flow generation mode.
  *
@@ -160,8 +138,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 								   next_flow);
 			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
 								   sizeof(*eth_hdr));
-			ip_hdr->hdr_checksum	= ip_sum((const alias_int16_t *)ip_hdr,
-							 sizeof(*ip_hdr));
+			ip_hdr->hdr_checksum	= rte_ipv4_cksum(ip_hdr);
 
 			/* Initialize UDP header. */
 			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v5 3/4] app/testpmd: record rx_burst and fwd_dropped in flowgen
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
@ 2021-08-13  8:05   ` Zhihong Wang
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 4/4] app/testpmd: use per-core variable " Zhihong Wang
  2021-08-24 17:21   ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Ferruh Yigit
  4 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-13  8:05 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Call inc_rx_burst_stats for rx operation, and record fwd_dropped.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
 app/test-pmd/flowgen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 96d0cc79df..229794ee9c 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -87,6 +87,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	/* Receive a burst of packets and discard them. */
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
 				 nb_pkt_per_burst);
+	inc_rx_burst_stats(fs, nb_rx);
 	fs->rx_packets += nb_rx;
 
 	for (i = 0; i < nb_rx; i++)
@@ -186,6 +187,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		while (next_flow < 0)
 			next_flow += cfg_n_flows;
 
+		fs->fwd_dropped += nb_pkt - nb_tx;
 		do {
 			rte_pktmbuf_free(pkts_burst[nb_tx]);
 		} while (++nb_tx < nb_pkt);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [dpdk-dev] [PATCH v5 4/4] app/testpmd: use per-core variable in flowgen
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
                     ` (2 preceding siblings ...)
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
@ 2021-08-13  8:05   ` Zhihong Wang
  2021-08-24 17:21   ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Ferruh Yigit
  4 siblings, 0 replies; 32+ messages in thread
From: Zhihong Wang @ 2021-08-13  8:05 UTC (permalink / raw)
  To: dev, ferruh.yigit, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy
  Cc: Zhihong Wang

Use per-core variable for flow indexing to solve cache contention in
multi-core scenarios.

Signed-off-by: Zhihong Wang <wangzhihong.wzh@bytedance.com>
Acked-by: Xiaoyun Li <xiaoyun.li@intel.com>
---
v5: replace modulo operation to improve performance
v4: use loop local variable to improve performance

 app/test-pmd/flowgen.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 229794ee9c..9348618d0f 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -53,6 +53,8 @@ static struct rte_ether_addr cfg_ether_dst =
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
+RTE_DEFINE_PER_LCORE(int, _next_flow);
+
 /*
  * Multi-flow generation mode.
  *
@@ -80,7 +82,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint32_t retry;
 	uint64_t tx_offloads;
 	uint64_t start_tsc = 0;
-	static int next_flow = 0;
+	int next_flow = RTE_PER_LCORE(_next_flow);
 
 	get_start_cycles(&start_tsc);
 
@@ -163,7 +165,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		}
 		pkts_burst[nb_pkt] = pkt;
 
-		next_flow = (next_flow + 1) % cfg_n_flows;
+		if (++next_flow >= (int)cfg_n_flows)
+			next_flow = 0;
 	}
 
 	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
@@ -193,6 +196,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		} while (++nb_tx < nb_pkt);
 	}
 
+	RTE_PER_LCORE(_next_flow) = next_flow;
+
 	get_end_cycles(fs, start_tsc);
 }
 
-- 
2.11.0


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements
  2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
                     ` (3 preceding siblings ...)
  2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 4/4] app/testpmd: use per-core variable " Zhihong Wang
@ 2021-08-24 17:21   ` Ferruh Yigit
  4 siblings, 0 replies; 32+ messages in thread
From: Ferruh Yigit @ 2021-08-24 17:21 UTC (permalink / raw)
  To: Zhihong Wang, dev, xiaoyun.li, aman.deep.singh, irusskikh, cchemparathy

On 8/13/2021 9:05 AM, Zhihong Wang wrote:
> This series fixes a tx retry defect and improves multi-core performance
> by using per-core variable for flow indexing.
> 
> v5: replace modulo operation to improve performance
> v4: use loop local variable to improve performance
> v3: split changes and keep original flow generation logic
> v2: fix assigning ip header cksum
> 
> Zhihong Wang (4):
>   app/testpmd: fix tx retry in flowgen
>   app/testpmd: use rte_ipv4_cksum in flowgen
>   app/testpmd: record rx_burst and fwd_dropped in flowgen
>   app/testpmd: use per-core variable in flowgen
> 

Series applied to dpdk-next-net/main, thanks.

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2021-08-24 17:22 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-09  6:25 [dpdk-dev] [PATCH] app/testpmd: flowgen support ip and udp fields Zhihong Wang
2021-08-09  6:52 ` [dpdk-dev] [PATCH v2] " Zhihong Wang
2021-08-09 12:21   ` Singh, Aman Deep
2021-08-10  7:30     ` [dpdk-dev] [External] " 王志宏
2021-08-09 15:18   ` [dpdk-dev] " Ferruh Yigit
2021-08-10  7:57     ` [dpdk-dev] [External] " 王志宏
2021-08-10  9:12       ` Ferruh Yigit
2021-08-11  2:48         ` 王志宏
2021-08-11 10:31           ` Ferruh Yigit
2021-08-12  9:32             ` 王志宏
2021-08-12 11:11 ` [dpdk-dev] [PATCH v3 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
2021-08-12 11:11   ` [dpdk-dev] [PATCH v3 4/4] app/testpmd: use per-core variable " Zhihong Wang
2021-08-12 13:18 ` [dpdk-dev] [PATCH v4 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
2021-08-13  1:33     ` Li, Xiaoyun
2021-08-13  2:27       ` [dpdk-dev] [External] " 王志宏
2021-08-12 13:18   ` [dpdk-dev] [PATCH v4 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
2021-08-13  1:37     ` Li, Xiaoyun
2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
2021-08-13  1:44     ` Li, Xiaoyun
2021-08-12 13:19   ` [dpdk-dev] [PATCH v4 4/4] app/testpmd: use per-core variable " Zhihong Wang
2021-08-13  1:56     ` Li, Xiaoyun
2021-08-13  2:35       ` [dpdk-dev] [External] " 王志宏
2021-08-13  8:05 ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Zhihong Wang
2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 1/4] app/testpmd: fix tx retry in flowgen Zhihong Wang
2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 2/4] app/testpmd: use rte_ipv4_cksum " Zhihong Wang
2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 3/4] app/testpmd: record rx_burst and fwd_dropped " Zhihong Wang
2021-08-13  8:05   ` [dpdk-dev] [PATCH v5 4/4] app/testpmd: use per-core variable " Zhihong Wang
2021-08-24 17:21   ` [dpdk-dev] [PATCH v5 0/4] app/testpmd: flowgen fixes and improvements Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).