DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] examples/l3fwd: fix Tx performance deteriorate
@ 2024-11-22  7:13 Jie Hai
  2024-11-22  9:14 ` lihuisong (C)
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Jie Hai @ 2024-11-22  7:13 UTC (permalink / raw)
  To: dev, thomas, ferruh.yigit, Morten Brørup, Chengwen Feng, Huisong Li
  Cc: haijie1, huangdengdui

The application send packets only when the buffer is full, or the
buffer is empty and the number of packets to be sent extends half
of the buffer.

The change of MAX_PKT_BURST increases TX buffer size, while the
default size of local cache on each lcore is 256, which not greater
than the limit of transmitting. That would make the mbuf not on the
local cache be frequently used and the performance deteriorates.

This problem can be solved by making the TX threshold smaller than
the local cache size. For example, use the '--mbcache' parameter to
make the local cache greater. This patch optimizes the default
performance by lowering TX threshold.

Fixes: d5c4897ecfb2 ("examples/l3fwd: add option to set Rx burst size")

Signed-off-by: Jie Hai <haijie1@huawei.com>
---
 examples/l3fwd/l3fwd.h        | 8 +++++---
 examples/l3fwd/l3fwd_common.h | 6 +++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 0cce3406ee7d..a01fecd51261 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -35,7 +35,7 @@
 /*
  * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
  */
-#define	MAX_TX_BURST	  (MAX_PKT_BURST / 2)
+#define	MAX_TX_BURST DEFAULT_PKT_BURST
 
 #define NB_SOCKETS        8
 
@@ -57,6 +57,8 @@
 #define L3FWD_HASH_ENTRIES		(1024*1024*1)
 #endif
 
+static_assert(MAX_TX_BURST <= MAX_PKT_BURST, "MAX_TX_BURST should be at most MAX_PKT_BURST");
+
 struct parm_cfg {
 	const char *rule_ipv4_name;
 	const char *rule_ipv6_name;
@@ -152,8 +154,8 @@ send_single_packet(struct lcore_conf *qconf,
 	len++;
 
 	/* enough pkts to be sent */
-	if (unlikely(len == MAX_PKT_BURST)) {
-		send_burst(qconf, MAX_PKT_BURST, port);
+	if (unlikely(len == MAX_TX_BURST)) {
+		send_burst(qconf, MAX_TX_BURST, port);
 		len = 0;
 	}
 
diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
index d94e5f135791..3f504dc0a552 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -71,7 +71,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
 	 * If TX buffer for that queue is empty, and we have enough packets,
 	 * then send them straightway.
 	 */
-	if (num >= MAX_TX_BURST && len == 0) {
+	if (num >= MAX_TX_BURST / 2 && len == 0) {
 		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
 		if (unlikely(n < num)) {
 			do {
@@ -112,9 +112,9 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
 	len += n;
 
 	/* enough pkts to be sent */
-	if (unlikely(len == MAX_PKT_BURST)) {
+	if (unlikely(len > MAX_TX_BURST)) {
 
-		send_burst(qconf, MAX_PKT_BURST, port);
+		send_burst(qconf, len, port);
 
 		/* copy rest of the packets into the TX buffer. */
 		len = num - n;
-- 
2.22.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] examples/l3fwd: fix Tx performance deteriorate
  2024-11-22  7:13 [PATCH] examples/l3fwd: fix Tx performance deteriorate Jie Hai
@ 2024-11-22  9:14 ` lihuisong (C)
  2024-11-22 11:01 ` Konstantin Ananyev
  2024-11-25 19:44 ` Stephen Hemminger
  2 siblings, 0 replies; 5+ messages in thread
From: lihuisong (C) @ 2024-11-22  9:14 UTC (permalink / raw)
  To: Jie Hai, dev, thomas, ferruh.yigit, Morten Brørup, Chengwen Feng
  Cc: huangdengdui

LGTM, good job.
Acked-by: Huisong Li <lihuisong@huawei.com>

在 2024/11/22 15:13, Jie Hai 写道:
> The application send packets only when the buffer is full, or the
> buffer is empty and the number of packets to be sent extends half
> of the buffer.
>
> The change of MAX_PKT_BURST increases TX buffer size, while the
> default size of local cache on each lcore is 256, which not greater
> than the limit of transmitting. That would make the mbuf not on the
> local cache be frequently used and the performance deteriorates.
>
> This problem can be solved by making the TX threshold smaller than
> the local cache size. For example, use the '--mbcache' parameter to
> make the local cache greater. This patch optimizes the default
> performance by lowering TX threshold.
>
> Fixes: d5c4897ecfb2 ("examples/l3fwd: add option to set Rx burst size")
>
> Signed-off-by: Jie Hai <haijie1@huawei.com>
> ---
>   examples/l3fwd/l3fwd.h        | 8 +++++---
>   examples/l3fwd/l3fwd_common.h | 6 +++---
>   2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index 0cce3406ee7d..a01fecd51261 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -35,7 +35,7 @@
>   /*
>    * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
>    */
> -#define	MAX_TX_BURST	  (MAX_PKT_BURST / 2)
> +#define	MAX_TX_BURST DEFAULT_PKT_BURST
>   
>   #define NB_SOCKETS        8
>   
> @@ -57,6 +57,8 @@
>   #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>   #endif
>   
> +static_assert(MAX_TX_BURST <= MAX_PKT_BURST, "MAX_TX_BURST should be at most MAX_PKT_BURST");
> +
>   struct parm_cfg {
>   	const char *rule_ipv4_name;
>   	const char *rule_ipv6_name;
> @@ -152,8 +154,8 @@ send_single_packet(struct lcore_conf *qconf,
>   	len++;
>   
>   	/* enough pkts to be sent */
> -	if (unlikely(len == MAX_PKT_BURST)) {
> -		send_burst(qconf, MAX_PKT_BURST, port);
> +	if (unlikely(len == MAX_TX_BURST)) {
> +		send_burst(qconf, MAX_TX_BURST, port);
>   		len = 0;
>   	}
>   
> diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
> index d94e5f135791..3f504dc0a552 100644
> --- a/examples/l3fwd/l3fwd_common.h
> +++ b/examples/l3fwd/l3fwd_common.h
> @@ -71,7 +71,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>   	 * If TX buffer for that queue is empty, and we have enough packets,
>   	 * then send them straightway.
>   	 */
> -	if (num >= MAX_TX_BURST && len == 0) {
> +	if (num >= MAX_TX_BURST / 2 && len == 0) {
>   		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
>   		if (unlikely(n < num)) {
>   			do {
> @@ -112,9 +112,9 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>   	len += n;
>   
>   	/* enough pkts to be sent */
> -	if (unlikely(len == MAX_PKT_BURST)) {
> +	if (unlikely(len > MAX_TX_BURST)) {
>   
> -		send_burst(qconf, MAX_PKT_BURST, port);
> +		send_burst(qconf, len, port);
>   
>   		/* copy rest of the packets into the TX buffer. */
>   		len = num - n;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] examples/l3fwd: fix Tx performance deteriorate
  2024-11-22  7:13 [PATCH] examples/l3fwd: fix Tx performance deteriorate Jie Hai
  2024-11-22  9:14 ` lihuisong (C)
@ 2024-11-22 11:01 ` Konstantin Ananyev
  2024-11-26  7:39   ` Jie Hai
  2024-11-25 19:44 ` Stephen Hemminger
  2 siblings, 1 reply; 5+ messages in thread
From: Konstantin Ananyev @ 2024-11-22 11:01 UTC (permalink / raw)
  To: haijie, dev, thomas, ferruh.yigit, Morten Brørup,
	Fengchengwen, lihuisong (C)
  Cc: haijie, huangdengdui



> The application send packets only when the buffer is full, or the
> buffer is empty and the number of packets to be sent extends half
> of the buffer.
> 
> The change of MAX_PKT_BURST increases TX buffer size, while the
> default size of local cache on each lcore is 256, which not greater
> than the limit of transmitting. That would make the mbuf not on the
> local cache be frequently used and the performance deteriorates.
> 
> This problem can be solved by making the TX threshold smaller than
> the local cache size. For example, use the '--mbcache' parameter to
> make the local cache greater. This patch optimizes the default
> performance by lowering TX threshold.

In  commit:
examples/l3fwd: add option to set Rx burst size
you introduced new global 
uint32_t nb_pkt_per_burst;
Why not to use it for both (rx and tx) paths?
Or if necessary introduce another one for tx, so we'll have:
uint32_t nb_rx_pkt_per_burst, nb_tx_pkt_per_burst,;
To me that is much better then create some hardcoded
and implicit thresholds.  

> Fixes: d5c4897ecfb2 ("examples/l3fwd: add option to set Rx burst size")
> 
> Signed-off-by: Jie Hai <haijie1@huawei.com>
> ---
>  examples/l3fwd/l3fwd.h        | 8 +++++---
>  examples/l3fwd/l3fwd_common.h | 6 +++---
>  2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index 0cce3406ee7d..a01fecd51261 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -35,7 +35,7 @@
>  /*
>   * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
>   */
> -#define	MAX_TX_BURST	  (MAX_PKT_BURST / 2)
> +#define	MAX_TX_BURST DEFAULT_PKT_BURST
> 
>  #define NB_SOCKETS        8
> 
> @@ -57,6 +57,8 @@
>  #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>  #endif
> 
> +static_assert(MAX_TX_BURST <= MAX_PKT_BURST, "MAX_TX_BURST should be at most MAX_PKT_BURST");
> +
>  struct parm_cfg {
>  	const char *rule_ipv4_name;
>  	const char *rule_ipv6_name;
> @@ -152,8 +154,8 @@ send_single_packet(struct lcore_conf *qconf,
>  	len++;
> 
>  	/* enough pkts to be sent */
> -	if (unlikely(len == MAX_PKT_BURST)) {
> -		send_burst(qconf, MAX_PKT_BURST, port);
> +	if (unlikely(len == MAX_TX_BURST)) {
> +		send_burst(qconf, MAX_TX_BURST, port);
>  		len = 0;
>  	}
> 
> diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
> index d94e5f135791..3f504dc0a552 100644
> --- a/examples/l3fwd/l3fwd_common.h
> +++ b/examples/l3fwd/l3fwd_common.h
> @@ -71,7 +71,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>  	 * If TX buffer for that queue is empty, and we have enough packets,
>  	 * then send them straightway.
>  	 */
> -	if (num >= MAX_TX_BURST && len == 0) {
> +	if (num >= MAX_TX_BURST / 2 && len == 0) {
>  		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
>  		if (unlikely(n < num)) {
>  			do {
> @@ -112,9 +112,9 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>  	len += n;
> 
>  	/* enough pkts to be sent */
> -	if (unlikely(len == MAX_PKT_BURST)) {
> +	if (unlikely(len > MAX_TX_BURST)) {
> 
> -		send_burst(qconf, MAX_PKT_BURST, port);
> +		send_burst(qconf, len, port);
> 
>  		/* copy rest of the packets into the TX buffer. */
>  		len = num - n;
> --
> 2.22.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] examples/l3fwd: fix Tx performance deteriorate
  2024-11-22  7:13 [PATCH] examples/l3fwd: fix Tx performance deteriorate Jie Hai
  2024-11-22  9:14 ` lihuisong (C)
  2024-11-22 11:01 ` Konstantin Ananyev
@ 2024-11-25 19:44 ` Stephen Hemminger
  2 siblings, 0 replies; 5+ messages in thread
From: Stephen Hemminger @ 2024-11-25 19:44 UTC (permalink / raw)
  To: Jie Hai
  Cc: dev, thomas, ferruh.yigit, Morten Brørup, Chengwen Feng,
	Huisong Li, huangdengdui

On Fri, 22 Nov 2024 15:13:36 +0800
Jie Hai <haijie1@huawei.com> wrote:

> The application send packets only when the buffer is full, or the
> buffer is empty and the number of packets to be sent extends half
> of the buffer.
> 
> The change of MAX_PKT_BURST increases TX buffer size, while the
> default size of local cache on each lcore is 256, which not greater
> than the limit of transmitting. That would make the mbuf not on the
> local cache be frequently used and the performance deteriorates.
> 
> This problem can be solved by making the TX threshold smaller than
> the local cache size. For example, use the '--mbcache' parameter to
> make the local cache greater. This patch optimizes the default
> performance by lowering TX threshold.
> 
> Fixes: d5c4897ecfb2 ("examples/l3fwd: add option to set Rx burst size")
> 
> Signed-off-by: Jie Hai <haijie1@huawei.com>

Do the other variants of l3fwd have the same problem?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] examples/l3fwd: fix Tx performance deteriorate
  2024-11-22 11:01 ` Konstantin Ananyev
@ 2024-11-26  7:39   ` Jie Hai
  0 siblings, 0 replies; 5+ messages in thread
From: Jie Hai @ 2024-11-26  7:39 UTC (permalink / raw)
  To: Konstantin Ananyev, dev, thomas, ferruh.yigit,
	Morten Brørup, Fengchengwen, lihuisong (C)
  Cc: huangdengdui

Hi, konstantin.ananyev,

That sounds better, will send V2。

Thanks,
Jie Hai

> 
> In  commit:
> examples/l3fwd: add option to set Rx burst size
> you introduced new global
> uint32_t nb_pkt_per_burst;
> Why not to use it for both (rx and tx) paths?
> Or if necessary introduce another one for tx, so we'll have:
> uint32_t nb_rx_pkt_per_burst, nb_tx_pkt_per_burst,;
> To me that is much better then create some hardcoded
> and implicit thresholds.
> 
>> Fixes: d5c4897ecfb2 ("examples/l3fwd: add option to set Rx burst size")
>>
>> Signed-off-by: Jie Hai <haijie1@huawei.com>
>> ---
>>   examples/l3fwd/l3fwd.h        | 8 +++++---
>>   examples/l3fwd/l3fwd_common.h | 6 +++---
>>   2 files changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
>> index 0cce3406ee7d..a01fecd51261 100644
>> --- a/examples/l3fwd/l3fwd.h
>> +++ b/examples/l3fwd/l3fwd.h
>> @@ -35,7 +35,7 @@
>>   /*
>>    * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
>>    */
>> -#define	MAX_TX_BURST	  (MAX_PKT_BURST / 2)
>> +#define	MAX_TX_BURST DEFAULT_PKT_BURST
>>
>>   #define NB_SOCKETS        8
>>
>> @@ -57,6 +57,8 @@
>>   #define L3FWD_HASH_ENTRIES		(1024*1024*1)
>>   #endif
>>
>> +static_assert(MAX_TX_BURST <= MAX_PKT_BURST, "MAX_TX_BURST should be at most MAX_PKT_BURST");
>> +
>>   struct parm_cfg {
>>   	const char *rule_ipv4_name;
>>   	const char *rule_ipv6_name;
>> @@ -152,8 +154,8 @@ send_single_packet(struct lcore_conf *qconf,
>>   	len++;
>>
>>   	/* enough pkts to be sent */
>> -	if (unlikely(len == MAX_PKT_BURST)) {
>> -		send_burst(qconf, MAX_PKT_BURST, port);
>> +	if (unlikely(len == MAX_TX_BURST)) {
>> +		send_burst(qconf, MAX_TX_BURST, port);
>>   		len = 0;
>>   	}
>>
>> diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h
>> index d94e5f135791..3f504dc0a552 100644
>> --- a/examples/l3fwd/l3fwd_common.h
>> +++ b/examples/l3fwd/l3fwd_common.h
>> @@ -71,7 +71,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>>   	 * If TX buffer for that queue is empty, and we have enough packets,
>>   	 * then send them straightway.
>>   	 */
>> -	if (num >= MAX_TX_BURST && len == 0) {
>> +	if (num >= MAX_TX_BURST / 2 && len == 0) {
>>   		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
>>   		if (unlikely(n < num)) {
>>   			do {
>> @@ -112,9 +112,9 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>>   	len += n;
>>
>>   	/* enough pkts to be sent */
>> -	if (unlikely(len == MAX_PKT_BURST)) {
>> +	if (unlikely(len > MAX_TX_BURST)) {
>>
>> -		send_burst(qconf, MAX_PKT_BURST, port);
>> +		send_burst(qconf, len, port);
>>
>>   		/* copy rest of the packets into the TX buffer. */
>>   		len = num - n;
>> --
>> 2.22.0
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-11-26  7:39 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-22  7:13 [PATCH] examples/l3fwd: fix Tx performance deteriorate Jie Hai
2024-11-22  9:14 ` lihuisong (C)
2024-11-22 11:01 ` Konstantin Ananyev
2024-11-26  7:39   ` Jie Hai
2024-11-25 19:44 ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).