DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC PATCH v2] app/testpmd: tx pkt clones parameter in flowgen
@ 2020-09-25  9:07 Igor Russkikh
  2020-10-12 18:13 ` Ferruh Yigit
  0 siblings, 1 reply; 3+ messages in thread
From: Igor Russkikh @ 2020-09-25  9:07 UTC (permalink / raw)
  To: dev
  Cc: Rasesh Mody, Devendra Singh Rawat, Wenzhuo Lu, Beilei Xing,
	Bernard Iremonger, Stephen Hemminger, Igor Russkikh

When testing high performance numbers, it is often that CPU performance
limits the max values device can reach (both in pps and in gbps)

Here instead of recreating each packet separately, we use clones counter
to resend the same mbuf to the line multiple times.

PMDs handle that transparently due to reference counting inside of mbuf.

Verified on Marvell qede and atlantic PMDs.

v2: increment ref counter for each mbuf pointer copy

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
---
 app/test-pmd/flowgen.c                | 101 ++++++++++++++------------
 app/test-pmd/parameters.c             |  12 +++
 app/test-pmd/testpmd.c                |   1 +
 app/test-pmd/testpmd.h                |   1 +
 doc/guides/testpmd_app_ug/run_app.rst |   7 ++
 5 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index acf3e2460..f639155c7 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -94,6 +94,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	uint16_t nb_rx;
 	uint16_t nb_tx;
 	uint16_t nb_pkt;
+	uint16_t nb_clones = nb_pkt_clones;
 	uint16_t i;
 	uint32_t retry;
 	uint64_t tx_offloads;
@@ -123,53 +124,59 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 		ol_flags |= PKT_TX_MACSEC;
 
 	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
-		pkt = rte_mbuf_raw_alloc(mbp);
-		if (!pkt)
-			break;
-
-		pkt->data_len = pkt_size;
-		pkt->next = NULL;
-
-		/* Initialize Ethernet header. */
-		eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
-		rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
-		rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
-		eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
-
-		/* Initialize IP header. */
-		ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
-		memset(ip_hdr, 0, sizeof(*ip_hdr));
-		ip_hdr->version_ihl	= RTE_IPV4_VHL_DEF;
-		ip_hdr->type_of_service	= 0;
-		ip_hdr->fragment_offset	= 0;
-		ip_hdr->time_to_live	= IP_DEFTTL;
-		ip_hdr->next_proto_id	= IPPROTO_UDP;
-		ip_hdr->packet_id	= 0;
-		ip_hdr->src_addr	= rte_cpu_to_be_32(cfg_ip_src);
-		ip_hdr->dst_addr	= rte_cpu_to_be_32(cfg_ip_dst +
-							   next_flow);
-		ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
-							   sizeof(*eth_hdr));
-		ip_hdr->hdr_checksum	= ip_sum((unaligned_uint16_t *)ip_hdr,
-						 sizeof(*ip_hdr));
-
-		/* Initialize UDP header. */
-		udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
-		udp_hdr->src_port	= rte_cpu_to_be_16(cfg_udp_src);
-		udp_hdr->dst_port	= rte_cpu_to_be_16(cfg_udp_dst);
-		udp_hdr->dgram_cksum	= 0; /* No UDP checksum. */
-		udp_hdr->dgram_len	= RTE_CPU_TO_BE_16(pkt_size -
-							   sizeof(*eth_hdr) -
-							   sizeof(*ip_hdr));
-		pkt->nb_segs		= 1;
-		pkt->pkt_len		= pkt_size;
-		pkt->ol_flags		&= EXT_ATTACHED_MBUF;
-		pkt->ol_flags		|= ol_flags;
-		pkt->vlan_tci		= vlan_tci;
-		pkt->vlan_tci_outer	= vlan_tci_outer;
-		pkt->l2_len		= sizeof(struct rte_ether_hdr);
-		pkt->l3_len		= sizeof(struct rte_ipv4_hdr);
-		pkts_burst[nb_pkt]	= pkt;
+		if (!nb_pkt || !nb_clones) {
+			nb_clones = nb_pkt_clones;
+			pkt = rte_mbuf_raw_alloc(mbp);
+			if (!pkt)
+				break;
+
+			pkt->data_len = pkt_size;
+			pkt->next = NULL;
+
+			/* Initialize Ethernet header. */
+			eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+			rte_ether_addr_copy(&cfg_ether_dst, &eth_hdr->d_addr);
+			rte_ether_addr_copy(&cfg_ether_src, &eth_hdr->s_addr);
+			eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+
+			/* Initialize IP header. */
+			ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
+			memset(ip_hdr, 0, sizeof(*ip_hdr));
+			ip_hdr->version_ihl	= RTE_IPV4_VHL_DEF;
+			ip_hdr->type_of_service	= 0;
+			ip_hdr->fragment_offset	= 0;
+			ip_hdr->time_to_live	= IP_DEFTTL;
+			ip_hdr->next_proto_id	= IPPROTO_UDP;
+			ip_hdr->packet_id	= 0;
+			ip_hdr->src_addr	= rte_cpu_to_be_32(cfg_ip_src);
+			ip_hdr->dst_addr	= rte_cpu_to_be_32(cfg_ip_dst +
+								next_flow);
+			ip_hdr->total_length	= RTE_CPU_TO_BE_16(pkt_size -
+								sizeof(*eth_hdr));
+			ip_hdr->hdr_checksum	= ip_sum((unaligned_uint16_t *)ip_hdr,
+							sizeof(*ip_hdr));
+
+			/* Initialize UDP header. */
+			udp_hdr = (struct rte_udp_hdr *)(ip_hdr + 1);
+			udp_hdr->src_port	= rte_cpu_to_be_16(cfg_udp_src);
+			udp_hdr->dst_port	= rte_cpu_to_be_16(cfg_udp_dst);
+			udp_hdr->dgram_cksum	= 0; /* No UDP checksum. */
+			udp_hdr->dgram_len	= RTE_CPU_TO_BE_16(pkt_size -
+								sizeof(*eth_hdr) -
+								sizeof(*ip_hdr));
+			pkt->nb_segs		= 1;
+			pkt->pkt_len		= pkt_size;
+			pkt->ol_flags		&= EXT_ATTACHED_MBUF;
+			pkt->ol_flags		|= ol_flags;
+			pkt->vlan_tci		= vlan_tci;
+			pkt->vlan_tci_outer	= vlan_tci_outer;
+			pkt->l2_len		= sizeof(struct rte_ether_hdr);
+			pkt->l3_len		= sizeof(struct rte_ipv4_hdr);
+		} else {
+			nb_clones--;
+			rte_mbuf_refcnt_update(pkt, 1);
+		}
+		pkts_burst[nb_pkt] = pkt;
 
 		next_flow = (next_flow + 1) % cfg_n_flows;
 	}
diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index 1ead59579..a2863bf8d 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -161,6 +161,7 @@ usage(char* progname)
 	printf("  --hairpinq=N: set the number of hairpin queues per port to "
 	       "N.\n");
 	printf("  --burst=N: set the number of packets per burst to N.\n");
+	printf("  --clones=N: set the number of single packet clones to send. Should be less than burst value.\n");
 	printf("  --mbcache=N: set the cache of mbuf memory pool to N.\n");
 	printf("  --rxpt=N: set prefetch threshold register of RX rings to N.\n");
 	printf("  --rxht=N: set the host threshold register of RX rings to N.\n");
@@ -645,6 +646,7 @@ launch_args_parse(int argc, char** argv)
 		{ "txd",			1, 0, 0 },
 		{ "hairpinq",			1, 0, 0 },
 		{ "burst",			1, 0, 0 },
+		{ "clones",			1, 0, 0 },
 		{ "mbcache",			1, 0, 0 },
 		{ "txpt",			1, 0, 0 },
 		{ "txht",			1, 0, 0 },
@@ -1151,6 +1153,16 @@ launch_args_parse(int argc, char** argv)
 				else
 					nb_pkt_per_burst = (uint16_t) n;
 			}
+			if (!strcmp(lgopts[opt_idx].name, "clones")) {
+				n = atoi(optarg);
+				if ((n >= 0) &&
+				    (n <= nb_pkt_per_burst))
+					nb_pkt_clones = (uint16_t) n;
+				else
+					rte_exit(EXIT_FAILURE,
+						 "clones must be >= 0 and <= %d (burst)\n",
+						 nb_pkt_per_burst);
+			}
 			if (!strcmp(lgopts[opt_idx].name, "mbcache")) {
 				n = atoi(optarg);
 				if ((n >= 0) &&
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index fe6450cc0..18b4b63d1 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -228,6 +228,7 @@ uint32_t tx_pkt_times_intra;
 /**< Timings for send scheduling in TXONLY mode, time between packets. */
 
 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
+uint16_t nb_pkt_clones; /**< Number of tx packet clones to send. */
 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
 
 /* current configuration is in DCB or not,0 means it is not in DCB mode */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index f139fe7a0..7337b5b94 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -431,6 +431,7 @@ extern enum tx_pkt_split tx_pkt_split;
 extern uint8_t txonly_multi_flow;
 
 extern uint16_t nb_pkt_per_burst;
+extern uint16_t nb_pkt_clones;
 extern uint16_t mb_mempool_cache;
 extern int8_t rx_pthresh;
 extern int8_t rx_hthresh;
diff --git a/doc/guides/testpmd_app_ug/run_app.rst b/doc/guides/testpmd_app_ug/run_app.rst
index e2539f693..42c2efb1f 100644
--- a/doc/guides/testpmd_app_ug/run_app.rst
+++ b/doc/guides/testpmd_app_ug/run_app.rst
@@ -296,6 +296,13 @@ The command line options are:
     If set to 0, driver default is used if defined. Else, if driver
     default is not defined, default of 32 is used.
 
+*   ``--clones=N``
+
+    Set the number of each packet clones to be sent in `flowgen` mode.
+    Sending clones reduces host CPU load on creating packets and may help
+    in testing extreme speeds or maxing out tx packet performance.
+    N should be not zero, but less than 'burst' parameter.
+
 *   ``--mbcache=N``
 
     Set the cache of mbuf memory pools to N, where 0 <= N <= 512.
-- 
2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2] app/testpmd: tx pkt clones parameter in flowgen
  2020-09-25  9:07 [dpdk-dev] [RFC PATCH v2] app/testpmd: tx pkt clones parameter in flowgen Igor Russkikh
@ 2020-10-12 18:13 ` Ferruh Yigit
  2020-10-12 19:37   ` [dpdk-dev] [EXT] " Igor Russkikh
  0 siblings, 1 reply; 3+ messages in thread
From: Ferruh Yigit @ 2020-10-12 18:13 UTC (permalink / raw)
  To: Igor Russkikh, dev
  Cc: Rasesh Mody, Devendra Singh Rawat, Wenzhuo Lu, Beilei Xing,
	Bernard Iremonger, Stephen Hemminger

On 9/25/2020 10:07 AM, Igor Russkikh wrote:
> When testing high performance numbers, it is often that CPU performance
> limits the max values device can reach (both in pps and in gbps)
> 
> Here instead of recreating each packet separately, we use clones counter
> to resend the same mbuf to the line multiple times.
> 
Do you have any numbers on how much performance improvement gained?

> PMDs handle that transparently due to reference counting inside of mbuf.
> 
> Verified on Marvell qede and atlantic PMDs.
> 
> v2: increment ref counter for each mbuf pointer copy
> 
> Signed-off-by: Igor Russkikh <irusskikh@marvell.com>

<...>

> @@ -1151,6 +1153,16 @@ launch_args_parse(int argc, char** argv)
>   				else
>   					nb_pkt_per_burst = (uint16_t) n;
>   			}
> +			if (!strcmp(lgopts[opt_idx].name, "clones")) {
> +				n = atoi(optarg);
> +				if ((n >= 0) &&
> +				    (n <= nb_pkt_per_burst))
> +					nb_pkt_clones = (uint16_t) n;
> +				else
> +					rte_exit(EXIT_FAILURE,
> +						 "clones must be >= 0 and <= %d (burst)\n",
> +						 nb_pkt_per_burst);

Do you need to enforce the "n <= nb_pkt_per_burst", burst value can be changed 
later and trying to keep 'clones' values in sync with it is additional work.

In the flowgen logic, with each burst a new packet is created anyway. So instead 
of enforcing the 'clones' number range, in documentation you can say the clone 
number can't exceed the burst number whatever it is set.

> +			}
>   			if (!strcmp(lgopts[opt_idx].name, "mbcache")) {
>   				n = atoi(optarg);
>   				if ((n >= 0) &&
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index fe6450cc0..18b4b63d1 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -228,6 +228,7 @@ uint32_t tx_pkt_times_intra;
>   /**< Timings for send scheduling in TXONLY mode, time between packets. */
>   
>   uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
> +uint16_t nb_pkt_clones; /**< Number of tx packet clones to send. */
>   uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
>   

Both the parameter name, 'clones', and the variable name 'nb_pkt_clones' are too 
generic, and may mislead users. Please remember that testpms usage is very wide.
You are updating very specifically the flowgen forwarding engine, can you please 
prefix the 'flowgen', like:
'flowgen-clones' & 'nb_pkt_flowgen_clones'.
Also explicitly mention in the description that this is for flowgen clones.

<...>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [EXT] Re: [RFC PATCH v2] app/testpmd: tx pkt clones parameter in flowgen
  2020-10-12 18:13 ` Ferruh Yigit
@ 2020-10-12 19:37   ` Igor Russkikh
  0 siblings, 0 replies; 3+ messages in thread
From: Igor Russkikh @ 2020-10-12 19:37 UTC (permalink / raw)
  To: Ferruh Yigit, dev
  Cc: Rasesh Mody, Devendra Singh Rawat, Wenzhuo Lu, Beilei Xing,
	Bernard Iremonger, Stephen Hemminger


>> Here instead of recreating each packet separately, we use clones counter
>> to resend the same mbuf to the line multiple times.
>>
> Do you have any numbers on how much performance improvement gained?

Hi Ferruh,

Yes, I'll post that in v1. In general, on our 100G device in default
configuration I had to use 8-16 queues (read cores) to reach line rate on 1400
packet size. Will give exact numbers.

With clones I was able to see linerate on 1-2 tx queues. That of course
depends on if HW itself allows to handle that amount of traffic.

Reaching max PPS on small packet sizes obviously helps here as well.

>> +					rte_exit(EXIT_FAILURE,
>> +						 "clones must be >= 0 and <= %d (burst)\n",
>> +						 nb_pkt_per_burst);
> 
> Do you need to enforce the "n <= nb_pkt_per_burst", burst value can be 
> changed
> later and trying to keep 'clones' values in sync with it is additional work.
> 
> In the flowgen logic, with each burst a new packet is created anyway. So 
> instead
> of enforcing the 'clones' number range, in documentation you can say the 
> clone
> number can't exceed the burst number whatever it is set.

Looks reasonable, thanks.

>>   uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per 
>> burst. */
>> +uint16_t nb_pkt_clones; /**< Number of tx packet clones to send. */
>>   uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool 
>> cache. */
>>
> 
> Both the parameter name, 'clones', and the variable name 'nb_pkt_clones' are 
> too
> generic, and may mislead users. Please remember that testpms usage is very 
> wide.
> You are updating very specifically the flowgen forwarding engine, can you 
> please
> prefix the 'flowgen', like:
> 'flowgen-clones' & 'nb_pkt_flowgen_clones'.
> Also explicitly mention in the description that this is for flowgen clones.

Reasonable as well. Initially I was thinking about applying that to "tx-only"
as well, but the decided to stay with flowgen as more generic mode.

Thanks,
  Igor

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-10-12 19:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-25  9:07 [dpdk-dev] [RFC PATCH v2] app/testpmd: tx pkt clones parameter in flowgen Igor Russkikh
2020-10-12 18:13 ` Ferruh Yigit
2020-10-12 19:37   ` [dpdk-dev] [EXT] " Igor Russkikh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).