* [PATCH] gro: fix gro with tcp push flag
@ 2022-10-14 2:37 Jun Qiu
0 siblings, 0 replies; 10+ messages in thread
From: Jun Qiu @ 2022-10-14 2:37 UTC (permalink / raw)
To: dev; +Cc: jiayu.hu, stable
TCP data packets sometimes carry a PUSH flag. Currently,
only the packets that do not have PUSH flag can be GROed.
The packets that have a PUSH flag cannot be GROed, the packets
that cannot be processed by GRO are placed last.
In this case, the received packets may be out of order.
For example, there are two packets mbuf1 and mbuf2. mbuf1
contains PUSH flag, mbuf2 does not contain PUSH flag.
After GRO processing, mbuf2 is sent for processing before mbuf1.
This out-of-order will affect TCP processing performance and
lead to unnecessary dup-ACK.
Like some hardware LRO implementations, a control flag is provided
for marking that TCP packets carrying PSH flags can be merged.
Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
Cc: stable@dpdk.org
Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com>
---
lib/gro/gro_tcp4.c | 15 ++++++++++-----
lib/gro/gro_tcp4.h | 21 +++++++++++++++++----
lib/gro/gro_vxlan_tcp4.c | 15 ++++++++++-----
lib/gro/gro_vxlan_tcp4.h | 5 ++++-
lib/gro/rte_gro.c | 12 ++++++++----
lib/gro/rte_gro.h | 5 +++++
6 files changed, 54 insertions(+), 19 deletions(-)
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
index 8f5e800250..0b8f86857e 100644
--- a/lib/gro/gro_tcp4.c
+++ b/lib/gro/gro_tcp4.c
@@ -6,6 +6,7 @@
#include <rte_mbuf.h>
#include <rte_ethdev.h>
+#include "rte_gro.h"
#include "gro_tcp4.h"
void *
@@ -191,7 +192,8 @@ update_header(struct gro_tcp4_item *item)
int32_t
gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_tcp4_tbl *tbl,
- uint64_t start_time)
+ uint64_t start_time,
+ uint16_t flags)
{
struct rte_ether_hdr *eth_hdr;
struct rte_ipv4_hdr *ipv4_hdr;
@@ -205,7 +207,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
uint32_t cur_idx, prev_idx, item_idx;
uint32_t i, max_flow_num, remaining_flow_num;
int cmp;
- uint8_t find;
+ uint8_t find, tcp_flags;
/*
* Don't process the packet whose TCP header length is greater
@@ -220,10 +222,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
- * or CWR set.
+ * Don't process the packet which has FIN, SYN, RST, URG, ECE
+ * or CWR set, the PSH flag is ignored at the user's discretion.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ tcp_flags = tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG));
+ if (flags & RTE_GRO_TCP_PUSH_IGNORE)
+ tcp_flags = tcp_flags & (~(RTE_TCP_PSH_FLAG));
+ if (tcp_flags)
return -1;
/*
* Don't process the packet whose payload length is less than or
diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
index 212f97a042..b005bfe7a0 100644
--- a/lib/gro/gro_tcp4.h
+++ b/lib/gro/gro_tcp4.h
@@ -134,6 +134,8 @@ void gro_tcp4_tbl_destroy(void *tbl);
* Pointer pointing to the TCP/IPv4 reassembly table
* @start_time
* The time when the packet is inserted into the table
+ * @flags
+ * Functional flags for GRO
*
* @return
* - Return a positive value if the packet is merged.
@@ -143,7 +145,8 @@ void gro_tcp4_tbl_destroy(void *tbl);
*/
int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_tcp4_tbl *tbl,
- uint64_t start_time);
+ uint64_t start_time,
+ uint16_t flags);
/**
* This function flushes timeout packets in a TCP/IPv4 reassembly table,
@@ -210,7 +213,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
uint16_t l2_offset)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
- uint16_t hdr_len, l2_len;
+ struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
+ uint16_t hdr_len, l2_len, l3_offset;
if (cmp > 0) {
pkt_head = item->firstseg;
@@ -221,13 +225,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
}
/* check if the IPv4 packet length is greater than the max value */
- hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
- pkt_head->l4_len;
+ l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
+ hdr_len = l3_offset + pkt_head->l4_len;
l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
+ /* merge push flag to pkt_head */
+ tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
+ struct rte_tcp_hdr *, l3_offset);
+ if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
+ head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
+ struct rte_tcp_hdr *, l3_offset);
+ head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
+ }
+
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
index 3be4deb7c7..c7282139ee 100644
--- a/lib/gro/gro_vxlan_tcp4.c
+++ b/lib/gro/gro_vxlan_tcp4.c
@@ -7,6 +7,7 @@
#include <rte_ethdev.h>
#include <rte_udp.h>
+#include "rte_gro.h"
#include "gro_vxlan_tcp4.h"
void *
@@ -287,7 +288,8 @@ update_vxlan_header(struct gro_vxlan_tcp4_item *item)
int32_t
gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_vxlan_tcp4_tbl *tbl,
- uint64_t start_time)
+ uint64_t start_time,
+ uint16_t flags)
{
struct rte_ether_hdr *outer_eth_hdr, *eth_hdr;
struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
@@ -304,7 +306,7 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
uint32_t i, max_flow_num, remaining_flow_num;
int cmp;
uint16_t hdr_len;
- uint8_t find;
+ uint8_t find, tcp_flags;
/*
* Don't process the packet whose TCP header length is greater
@@ -326,10 +328,13 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG,
- * ECE or CWR set.
+ * Don't process the packet which has FIN, SYN, RST, URG, ECE
+ * or CWR set, the PSH flag is ignored at the user's discretion.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ tcp_flags = tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG));
+ if (flags & RTE_GRO_TCP_PUSH_IGNORE)
+ tcp_flags = tcp_flags & (~(RTE_TCP_PSH_FLAG));
+ if (tcp_flags)
return -1;
hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
diff --git a/lib/gro/gro_vxlan_tcp4.h b/lib/gro/gro_vxlan_tcp4.h
index 7832942a68..b6d8b92599 100644
--- a/lib/gro/gro_vxlan_tcp4.h
+++ b/lib/gro/gro_vxlan_tcp4.h
@@ -108,6 +108,8 @@ void gro_vxlan_tcp4_tbl_destroy(void *tbl);
* Pointer pointing to the VxLAN reassembly table
* @start_time
* The time when the packet is inserted into the table
+ * @flags
+ * Functional flags for GRO
*
* @return
* - Return a positive value if the packet is merged.
@@ -117,7 +119,8 @@ void gro_vxlan_tcp4_tbl_destroy(void *tbl);
*/
int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_vxlan_tcp4_tbl *tbl,
- uint64_t start_time);
+ uint64_t start_time,
+ uint16_t flags);
/**
* This function flushes timeout packets in the VxLAN reassembly table,
diff --git a/lib/gro/rte_gro.c b/lib/gro/rte_gro.c
index e35399fd42..ce9ac81142 100644
--- a/lib/gro/rte_gro.c
+++ b/lib/gro/rte_gro.c
@@ -77,6 +77,8 @@ struct gro_ctx {
uint64_t gro_types;
/* reassembly tables */
void *tbls[RTE_GRO_TYPE_MAX_NUM];
+ /**< Functional flags for GRO */
+ uint16_t flags;
};
void *
@@ -116,6 +118,7 @@ rte_gro_ctx_create(const struct rte_gro_param *param)
gro_types |= gro_type_flag;
}
gro_ctx->gro_types = param->gro_types;
+ gro_ctx->flags = param->flags;
return gro_ctx;
}
@@ -245,7 +248,8 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
do_vxlan_tcp_gro) {
ret = gro_vxlan_tcp4_reassemble(pkts[i],
- &vxlan_tcp_tbl, 0);
+ &vxlan_tcp_tbl, 0,
+ param->flags);
if (ret > 0)
/* Merge successfully */
nb_after_gro--;
@@ -262,7 +266,7 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
unprocess_pkts[unprocess_num++] = pkts[i];
} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
do_tcp4_gro) {
- ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
+ ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0, param->flags);
if (ret > 0)
/* merge successfully */
nb_after_gro--;
@@ -354,7 +358,7 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
do_vxlan_tcp_gro) {
if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
- current_time) < 0)
+ current_time, gro_ctx->flags) < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
do_vxlan_udp_gro) {
@@ -364,7 +368,7 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
do_tcp4_gro) {
if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
- current_time) < 0)
+ current_time, gro_ctx->flags) < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
} else if (IS_IPV4_UDP_PKT(pkts[i]->packet_type) &&
do_udp4_gro) {
diff --git a/lib/gro/rte_gro.h b/lib/gro/rte_gro.h
index 9f9ed4935a..691b8a26cf 100644
--- a/lib/gro/rte_gro.h
+++ b/lib/gro/rte_gro.h
@@ -39,6 +39,7 @@ extern "C" {
#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
/**< VxLAN UDP/IPv4 GRO flag. */
+#define RTE_GRO_TCP_PUSH_IGNORE 0x01
/**
* Structure used to create GRO context objects or used to pass
* application-determined parameters to rte_gro_reassemble_burst().
@@ -55,6 +56,10 @@ struct rte_gro_param {
* like reassembly tables. When use rte_gro_reassemble_burst(),
* applications don't need to set this value.
*/
+ uint16_t flags;
+ /**< Functional flags for GRO, For example,
+ * merge TCP packets with push flag.
+ */
};
/**
--
2.25.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] gro: fix gro with tcp push flag
@ 2022-07-26 6:17 Jun Qiu
2022-07-26 6:41 ` kumaraparameshwaran rathinavel
0 siblings, 1 reply; 10+ messages in thread
From: Jun Qiu @ 2022-07-26 6:17 UTC (permalink / raw)
To: dev; +Cc: jiayu.hu, stable
TCP data packets sometimes carry a PUSH flag. Currently,
only the packets that do not have PUSH flag can be GROed.
The packets that have a PUSH flag cannot be GROed, the packets
that cannot be processed by GRO are placed last.
In this case, the received packets may be out of order.
For example, there are two packets mbuf1 and mbuf2. mbuf1
contains PUSH flag, mbuf2 does not contain PUSH flag.
After GRO processing, mbuf2 is sent for processing before mbuf1.
This out-of-order will affect TCP processing performance and
lead to unnecessary dup-ACK.
Referring to the Linux kernel implementation, packets with PUSH
flag can also perform GRO. And if one of the packets containing
PUSH flag, the packets after GRO will carry PUSH flag.
Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
Cc: stable@dpdk.org
Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com>
---
lib/gro/gro_tcp4.c | 4 ++--
lib/gro/gro_tcp4.h | 16 +++++++++++++---
lib/gro/gro_vxlan_tcp4.c | 4 ++--
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
index 7498c66141..7849a2bd1d 100644
--- a/lib/gro/gro_tcp4.c
+++ b/lib/gro/gro_tcp4.c
@@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+ * Don't process the packet which has FIN, SYN, RST, URG, ECE
* or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
/*
* Don't process the packet whose payload length is less than or
diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
index 212f97a042..2974faf228 100644
--- a/lib/gro/gro_tcp4.h
+++ b/lib/gro/gro_tcp4.h
@@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
uint16_t l2_offset)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
- uint16_t hdr_len, l2_len;
+ struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
+ uint16_t hdr_len, l2_len, l3_offset;
if (cmp > 0) {
pkt_head = item->firstseg;
@@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
}
/* check if the IPv4 packet length is greater than the max value */
- hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
- pkt_head->l4_len;
+ l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
+ hdr_len = l3_offset + pkt_head->l4_len;
l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
+ /* merge push flag to pkt_head */
+ tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
+ struct rte_tcp_hdr *, l3_offset);
+ if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
+ head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
+ struct rte_tcp_hdr *, l3_offset);
+ head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
+ }
+
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
index 3be4deb7c7..884802af0b 100644
--- a/lib/gro/gro_vxlan_tcp4.c
+++ b/lib/gro/gro_vxlan_tcp4.c
@@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+ * Don't process the packet which has FIN, SYN, RST, URG,
* ECE or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
--
2.25.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] gro: fix gro with tcp push flag
2022-07-26 6:17 Jun Qiu
@ 2022-07-26 6:41 ` kumaraparameshwaran rathinavel
2022-07-26 6:57 ` 答复: " Jun Qiu
0 siblings, 1 reply; 10+ messages in thread
From: kumaraparameshwaran rathinavel @ 2022-07-26 6:41 UTC (permalink / raw)
To: Jun Qiu; +Cc: dev, jiayu.hu, stable
[-- Attachment #1: Type: text/plain, Size: 4826 bytes --]
On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu <jun.qiu@jaguarmicro.com> wrote:
> TCP data packets sometimes carry a PUSH flag. Currently,
> only the packets that do not have PUSH flag can be GROed.
> The packets that have a PUSH flag cannot be GROed, the packets
> that cannot be processed by GRO are placed last.
> In this case, the received packets may be out of order.
> For example, there are two packets mbuf1 and mbuf2. mbuf1
> contains PUSH flag, mbuf2 does not contain PUSH flag.
> After GRO processing, mbuf2 is sent for processing before mbuf1.
> This out-of-order will affect TCP processing performance and
> lead to unnecessary dup-ACK.
>
> Referring to the Linux kernel implementation, packets with PUSH
> flag can also perform GRO. And if one of the packets containing
> PUSH flag, the packets after GRO will carry PUSH flag.
>
>
In case of smaller transfers in which the TCP segment size is not more than
one MTU, it is a single TCP packet with PSH flag set, so in those cases we
are introducing unwanted delay. I think the better approach would be if
there are previous packets in the flow and the current packet received has
PSH flag then coalesce with the previous packet, if lookup is failure and
the current packet has PSH flag set then deliver it immediately.
Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> Cc: stable@dpdk.org
>
> Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com>
> ---
> lib/gro/gro_tcp4.c | 4 ++--
> lib/gro/gro_tcp4.h | 16 +++++++++++++---
> lib/gro/gro_vxlan_tcp4.c | 4 ++--
> 3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
> index 7498c66141..7849a2bd1d 100644
> --- a/lib/gro/gro_tcp4.c
> +++ b/lib/gro/gro_tcp4.c
> @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> + * Don't process the packet which has FIN, SYN, RST, URG, ECE
> * or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
> /*
> * Don't process the packet whose payload length is less than or
> diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
> index 212f97a042..2974faf228 100644
> --- a/lib/gro/gro_tcp4.h
> +++ b/lib/gro/gro_tcp4.h
> @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> uint16_t l2_offset)
> {
> struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> - uint16_t hdr_len, l2_len;
> + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> + uint16_t hdr_len, l2_len, l3_offset;
>
> if (cmp > 0) {
> pkt_head = item->firstseg;
> @@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> }
>
> /* check if the IPv4 packet length is greater than the max value */
> - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> - pkt_head->l4_len;
> + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> + hdr_len = l3_offset + pkt_head->l4_len;
> l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> hdr_len > MAX_IPV4_PKT_LENGTH))
> return 0;
>
> + /* merge push flag to pkt_head */
> + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> + struct rte_tcp_hdr *, l3_offset);
> + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> + struct rte_tcp_hdr *, l3_offset);
> + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> + }
> +
> /* remove the packet header for the tail packet */
> rte_pktmbuf_adj(pkt_tail, hdr_len);
>
> diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
> index 3be4deb7c7..884802af0b 100644
> --- a/lib/gro/gro_vxlan_tcp4.c
> +++ b/lib/gro/gro_vxlan_tcp4.c
> @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> + * Don't process the packet which has FIN, SYN, RST, URG,
> * ECE or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
>
> hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> --
> 2.25.1
>
>
[-- Attachment #2: Type: text/html, Size: 6157 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* 答复: [PATCH] gro: fix gro with tcp push flag
2022-07-26 6:41 ` kumaraparameshwaran rathinavel
@ 2022-07-26 6:57 ` Jun Qiu
2022-07-26 7:08 ` kumaraparameshwaran rathinavel
0 siblings, 1 reply; 10+ messages in thread
From: Jun Qiu @ 2022-07-26 6:57 UTC (permalink / raw)
To: kumaraparameshwaran rathinavel; +Cc: dev, jiayu.hu, stable
[-- Attachment #1: Type: text/plain, Size: 5193 bytes --]
May be in rte_gro_reassemble_burst, where no delay is introduced, PUSH packets can be merged
发件人: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
发送时间: 2022年7月26日 14:41
收件人: Jun Qiu <jun.qiu@jaguarmicro.com>
抄送: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
主题: Re: [PATCH] gro: fix gro with tcp push flag
On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
TCP data packets sometimes carry a PUSH flag. Currently,
only the packets that do not have PUSH flag can be GROed.
The packets that have a PUSH flag cannot be GROed, the packets
that cannot be processed by GRO are placed last.
In this case, the received packets may be out of order.
For example, there are two packets mbuf1 and mbuf2. mbuf1
contains PUSH flag, mbuf2 does not contain PUSH flag.
After GRO processing, mbuf2 is sent for processing before mbuf1.
This out-of-order will affect TCP processing performance and
lead to unnecessary dup-ACK.
Referring to the Linux kernel implementation, packets with PUSH
flag can also perform GRO. And if one of the packets containing
PUSH flag, the packets after GRO will carry PUSH flag.
In case of smaller transfers in which the TCP segment size is not more than one MTU, it is a single TCP packet with PSH flag set, so in those cases we are introducing unwanted delay. I think the better approach would be if there are previous packets in the flow and the current packet received has PSH flag then coalesce with the previous packet, if lookup is failure and the current packet has PSH flag set then deliver it immediately.
Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
Cc: stable@dpdk.org<mailto:stable@dpdk.org>
Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
---
lib/gro/gro_tcp4.c | 4 ++--
lib/gro/gro_tcp4.h | 16 +++++++++++++---
lib/gro/gro_vxlan_tcp4.c | 4 ++--
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
index 7498c66141..7849a2bd1d 100644
--- a/lib/gro/gro_tcp4.c
+++ b/lib/gro/gro_tcp4.c
@@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+ * Don't process the packet which has FIN, SYN, RST, URG, ECE
* or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
/*
* Don't process the packet whose payload length is less than or
diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
index 212f97a042..2974faf228 100644
--- a/lib/gro/gro_tcp4.h
+++ b/lib/gro/gro_tcp4.h
@@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
uint16_t l2_offset)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
- uint16_t hdr_len, l2_len;
+ struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
+ uint16_t hdr_len, l2_len, l3_offset;
if (cmp > 0) {
pkt_head = item->firstseg;
@@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
}
/* check if the IPv4 packet length is greater than the max value */
- hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
- pkt_head->l4_len;
+ l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
+ hdr_len = l3_offset + pkt_head->l4_len;
l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
+ /* merge push flag to pkt_head */
+ tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
+ struct rte_tcp_hdr *, l3_offset);
+ if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
+ head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
+ struct rte_tcp_hdr *, l3_offset);
+ head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
+ }
+
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
index 3be4deb7c7..884802af0b 100644
--- a/lib/gro/gro_vxlan_tcp4.c
+++ b/lib/gro/gro_vxlan_tcp4.c
@@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+ * Don't process the packet which has FIN, SYN, RST, URG,
* ECE or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
--
2.25.1
[-- Attachment #2: Type: text/html, Size: 11288 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] gro: fix gro with tcp push flag
2022-07-26 6:57 ` 答复: " Jun Qiu
@ 2022-07-26 7:08 ` kumaraparameshwaran rathinavel
2022-07-27 8:44 ` Jun Qiu
0 siblings, 1 reply; 10+ messages in thread
From: kumaraparameshwaran rathinavel @ 2022-07-26 7:08 UTC (permalink / raw)
To: Jun Qiu; +Cc: dev, jiayu.hu, stable
[-- Attachment #1: Type: text/plain, Size: 5920 bytes --]
We should do it for the rte_gro_reassemble as well, with timer mode it
could lead to more duplicate ACKs. I had a proposal for the enhancement
which would handle both rte_gro_reassemble and rte_gro_reassemble_burst
but have not got any response yet.
I have a custom patch which is working fine for timer mode where there is
no packet reordering, earlier without the patch there were DUP-ACKs and
this could potentially affect the window scaling.
On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu <jun.qiu@jaguarmicro.com> wrote:
> May be in rte_gro_reassemble_burst, where no delay is introduced, PUSH
> packets can be merged
>
>
>
> *发件人:* kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
> *发送时间:* 2022年7月26日 14:41
> *收件人:* Jun Qiu <jun.qiu@jaguarmicro.com>
> *抄送:* dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
> *主题:* Re: [PATCH] gro: fix gro with tcp push flag
>
>
>
>
>
>
>
> On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu <jun.qiu@jaguarmicro.com> wrote:
>
> TCP data packets sometimes carry a PUSH flag. Currently,
> only the packets that do not have PUSH flag can be GROed.
> The packets that have a PUSH flag cannot be GROed, the packets
> that cannot be processed by GRO are placed last.
> In this case, the received packets may be out of order.
> For example, there are two packets mbuf1 and mbuf2. mbuf1
> contains PUSH flag, mbuf2 does not contain PUSH flag.
> After GRO processing, mbuf2 is sent for processing before mbuf1.
> This out-of-order will affect TCP processing performance and
> lead to unnecessary dup-ACK.
>
> Referring to the Linux kernel implementation, packets with PUSH
> flag can also perform GRO. And if one of the packets containing
> PUSH flag, the packets after GRO will carry PUSH flag.
>
>
>
> In case of smaller transfers in which the TCP segment size is not more
> than one MTU, it is a single TCP packet with PSH flag set, so in those
> cases we are introducing unwanted delay. I think the better approach
> would be if there are previous packets in the flow and the current packet
> received has PSH flag then coalesce with the previous packet, if lookup is
> failure and the current packet has PSH flag set then deliver it
> immediately.
>
>
>
> Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> Cc: stable@dpdk.org
>
> Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com>
> ---
> lib/gro/gro_tcp4.c | 4 ++--
> lib/gro/gro_tcp4.h | 16 +++++++++++++---
> lib/gro/gro_vxlan_tcp4.c | 4 ++--
> 3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
> index 7498c66141..7849a2bd1d 100644
> --- a/lib/gro/gro_tcp4.c
> +++ b/lib/gro/gro_tcp4.c
> @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> + * Don't process the packet which has FIN, SYN, RST, URG, ECE
> * or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
> /*
> * Don't process the packet whose payload length is less than or
> diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
> index 212f97a042..2974faf228 100644
> --- a/lib/gro/gro_tcp4.h
> +++ b/lib/gro/gro_tcp4.h
> @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> uint16_t l2_offset)
> {
> struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> - uint16_t hdr_len, l2_len;
> + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> + uint16_t hdr_len, l2_len, l3_offset;
>
> if (cmp > 0) {
> pkt_head = item->firstseg;
> @@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> }
>
> /* check if the IPv4 packet length is greater than the max value */
> - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> - pkt_head->l4_len;
> + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> + hdr_len = l3_offset + pkt_head->l4_len;
> l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> hdr_len > MAX_IPV4_PKT_LENGTH))
> return 0;
>
> + /* merge push flag to pkt_head */
> + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> + struct rte_tcp_hdr *, l3_offset);
> + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> + struct rte_tcp_hdr *, l3_offset);
> + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> + }
> +
> /* remove the packet header for the tail packet */
> rte_pktmbuf_adj(pkt_tail, hdr_len);
>
> diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
> index 3be4deb7c7..884802af0b 100644
> --- a/lib/gro/gro_vxlan_tcp4.c
> +++ b/lib/gro/gro_vxlan_tcp4.c
> @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> + * Don't process the packet which has FIN, SYN, RST, URG,
> * ECE or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
>
> hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> --
> 2.25.1
>
>
[-- Attachment #2: Type: text/html, Size: 9578 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] gro: fix gro with tcp push flag
2022-07-26 7:08 ` kumaraparameshwaran rathinavel
@ 2022-07-27 8:44 ` Jun Qiu
2022-08-29 10:36 ` Thomas Monjalon
0 siblings, 1 reply; 10+ messages in thread
From: Jun Qiu @ 2022-07-27 8:44 UTC (permalink / raw)
To: kumaraparameshwaran rathinavel; +Cc: dev, jiayu.hu, stable
[-- Attachment #1: Type: text/plain, Size: 6423 bytes --]
I think this delay is tolerable.
Many TCP stacks do not take special care of PUSH packets when receiving them. All received packets with data will trigger Poll events.
The patch is simple to implement and easy to understand, similar to how the kernel stack is handled.
From: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
Sent: Tuesday, July 26, 2022 3:08 PM
To: Jun Qiu <jun.qiu@jaguarmicro.com>
Cc: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
Subject: Re: [PATCH] gro: fix gro with tcp push flag
We should do it for the rte_gro_reassemble as well, with timer mode it could lead to more duplicate ACKs. I had a proposal for the enhancement which would handle both rte_gro_reassemble and rte_gro_reassemble_burst but have not got any response yet.
I have a custom patch which is working fine for timer mode where there is no packet reordering, earlier without the patch there were DUP-ACKs and this could potentially affect the window scaling.
On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
May be in rte_gro_reassemble_burst, where no delay is introduced, PUSH packets can be merged
发件人: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
发送时间: 2022年7月26日 14:41
收件人: Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
抄送: dev@dpdk.org<mailto:dev@dpdk.org>; jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>; stable@dpdk.org<mailto:stable@dpdk.org>
主题: Re: [PATCH] gro: fix gro with tcp push flag
On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
TCP data packets sometimes carry a PUSH flag. Currently,
only the packets that do not have PUSH flag can be GROed.
The packets that have a PUSH flag cannot be GROed, the packets
that cannot be processed by GRO are placed last.
In this case, the received packets may be out of order.
For example, there are two packets mbuf1 and mbuf2. mbuf1
contains PUSH flag, mbuf2 does not contain PUSH flag.
After GRO processing, mbuf2 is sent for processing before mbuf1.
This out-of-order will affect TCP processing performance and
lead to unnecessary dup-ACK.
Referring to the Linux kernel implementation, packets with PUSH
flag can also perform GRO. And if one of the packets containing
PUSH flag, the packets after GRO will carry PUSH flag.
In case of smaller transfers in which the TCP segment size is not more than one MTU, it is a single TCP packet with PSH flag set, so in those cases we are introducing unwanted delay. I think the better approach would be if there are previous packets in the flow and the current packet received has PSH flag then coalesce with the previous packet, if lookup is failure and the current packet has PSH flag set then deliver it immediately.
Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
Cc: stable@dpdk.org<mailto:stable@dpdk.org>
Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
---
lib/gro/gro_tcp4.c | 4 ++--
lib/gro/gro_tcp4.h | 16 +++++++++++++---
lib/gro/gro_vxlan_tcp4.c | 4 ++--
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
index 7498c66141..7849a2bd1d 100644
--- a/lib/gro/gro_tcp4.c
+++ b/lib/gro/gro_tcp4.c
@@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+ * Don't process the packet which has FIN, SYN, RST, URG, ECE
* or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
/*
* Don't process the packet whose payload length is less than or
diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
index 212f97a042..2974faf228 100644
--- a/lib/gro/gro_tcp4.h
+++ b/lib/gro/gro_tcp4.h
@@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
uint16_t l2_offset)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
- uint16_t hdr_len, l2_len;
+ struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
+ uint16_t hdr_len, l2_len, l3_offset;
if (cmp > 0) {
pkt_head = item->firstseg;
@@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
}
/* check if the IPv4 packet length is greater than the max value */
- hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
- pkt_head->l4_len;
+ l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
+ hdr_len = l3_offset + pkt_head->l4_len;
l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
+ /* merge push flag to pkt_head */
+ tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
+ struct rte_tcp_hdr *, l3_offset);
+ if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
+ head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
+ struct rte_tcp_hdr *, l3_offset);
+ head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
+ }
+
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
index 3be4deb7c7..884802af0b 100644
--- a/lib/gro/gro_vxlan_tcp4.c
+++ b/lib/gro/gro_vxlan_tcp4.c
@@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
/*
- * Don't process the packet which has FIN, SYN, RST, PSH, URG,
+ * Don't process the packet which has FIN, SYN, RST, URG,
* ECE or CWR set.
*/
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
+ if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
return -1;
hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
--
2.25.1
[-- Attachment #2: Type: text/html, Size: 15739 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] gro: fix gro with tcp push flag
2022-07-27 8:44 ` Jun Qiu
@ 2022-08-29 10:36 ` Thomas Monjalon
2022-08-30 8:43 ` Hu, Jiayu
0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2022-08-29 10:36 UTC (permalink / raw)
To: jiayu.hu; +Cc: kumaraparameshwaran rathinavel, dev, Jun Qiu
Jiayu, please could you help in this review?
27/07/2022 10:44, Jun Qiu:
> I think this delay is tolerable.
> Many TCP stacks do not take special care of PUSH packets when receiving them. All received packets with data will trigger Poll events.
>
> The patch is simple to implement and easy to understand, similar to how the kernel stack is handled.
>
> From: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
> Sent: Tuesday, July 26, 2022 3:08 PM
> To: Jun Qiu <jun.qiu@jaguarmicro.com>
> Cc: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
> Subject: Re: [PATCH] gro: fix gro with tcp push flag
>
> We should do it for the rte_gro_reassemble as well, with timer mode it could lead to more duplicate ACKs. I had a proposal for the enhancement which would handle both rte_gro_reassemble and rte_gro_reassemble_burst but have not got any response yet.
>
> I have a custom patch which is working fine for timer mode where there is no packet reordering, earlier without the patch there were DUP-ACKs and this could potentially affect the window scaling.
>
> On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> May be in rte_gro_reassemble_burst, where no delay is introduced, PUSH packets can be merged
>
> 发件人: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> 发送时间: 2022年7月26日 14:41
> 收件人: Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> 抄送: dev@dpdk.org<mailto:dev@dpdk.org>; jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>; stable@dpdk.org<mailto:stable@dpdk.org>
> 主题: Re: [PATCH] gro: fix gro with tcp push flag
>
>
>
> On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> TCP data packets sometimes carry a PUSH flag. Currently,
> only the packets that do not have PUSH flag can be GROed.
> The packets that have a PUSH flag cannot be GROed, the packets
> that cannot be processed by GRO are placed last.
> In this case, the received packets may be out of order.
> For example, there are two packets mbuf1 and mbuf2. mbuf1
> contains PUSH flag, mbuf2 does not contain PUSH flag.
> After GRO processing, mbuf2 is sent for processing before mbuf1.
> This out-of-order will affect TCP processing performance and
> lead to unnecessary dup-ACK.
>
> Referring to the Linux kernel implementation, packets with PUSH
> flag can also perform GRO. And if one of the packets containing
> PUSH flag, the packets after GRO will carry PUSH flag.
>
> In case of smaller transfers in which the TCP segment size is not more than one MTU, it is a single TCP packet with PSH flag set, so in those cases we are introducing unwanted delay. I think the better approach would be if there are previous packets in the flow and the current packet received has PSH flag then coalesce with the previous packet, if lookup is failure and the current packet has PSH flag set then deliver it immediately.
>
> Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> Cc: stable@dpdk.org<mailto:stable@dpdk.org>
>
> Signed-off-by: Jun Qiu <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> ---
> lib/gro/gro_tcp4.c | 4 ++--
> lib/gro/gro_tcp4.h | 16 +++++++++++++---
> lib/gro/gro_vxlan_tcp4.c | 4 ++--
> 3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c
> index 7498c66141..7849a2bd1d 100644
> --- a/lib/gro/gro_tcp4.c
> +++ b/lib/gro/gro_tcp4.c
> @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> + * Don't process the packet which has FIN, SYN, RST, URG, ECE
> * or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
> /*
> * Don't process the packet whose payload length is less than or
> diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h
> index 212f97a042..2974faf228 100644
> --- a/lib/gro/gro_tcp4.h
> +++ b/lib/gro/gro_tcp4.h
> @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> uint16_t l2_offset)
> {
> struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> - uint16_t hdr_len, l2_len;
> + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> + uint16_t hdr_len, l2_len, l3_offset;
>
> if (cmp > 0) {
> pkt_head = item->firstseg;
> @@ -221,13 +222,22 @@ merge_two_tcp4_packets(struct gro_tcp4_item *item,
> }
>
> /* check if the IPv4 packet length is greater than the max value */
> - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> - pkt_head->l4_len;
> + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> + hdr_len = l3_offset + pkt_head->l4_len;
> l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> hdr_len > MAX_IPV4_PKT_LENGTH))
> return 0;
>
> + /* merge push flag to pkt_head */
> + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> + struct rte_tcp_hdr *, l3_offset);
> + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> + struct rte_tcp_hdr *, l3_offset);
> + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> + }
> +
> /* remove the packet header for the tail packet */
> rte_pktmbuf_adj(pkt_tail, hdr_len);
>
> diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
> index 3be4deb7c7..884802af0b 100644
> --- a/lib/gro/gro_vxlan_tcp4.c
> +++ b/lib/gro/gro_vxlan_tcp4.c
> @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
> tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
>
> /*
> - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> + * Don't process the packet which has FIN, SYN, RST, URG,
> * ECE or CWR set.
> */
> - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG | RTE_TCP_PSH_FLAG)))
> return -1;
>
> hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] gro: fix gro with tcp push flag
2022-08-29 10:36 ` Thomas Monjalon
@ 2022-08-30 8:43 ` Hu, Jiayu
2022-09-05 4:02 ` Hu, Jiayu
0 siblings, 1 reply; 10+ messages in thread
From: Hu, Jiayu @ 2022-08-30 8:43 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: kumaraparameshwaran rathinavel, dev, Jun Qiu
Sure, I will review it.
> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Monday, August 29, 2022 6:36 PM
> To: Hu, Jiayu <jiayu.hu@intel.com>
> Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>;
> dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
> Subject: Re: [PATCH] gro: fix gro with tcp push flag
>
> Jiayu, please could you help in this review?
>
>
> 27/07/2022 10:44, Jun Qiu:
> > I think this delay is tolerable.
> > Many TCP stacks do not take special care of PUSH packets when receiving
> them. All received packets with data will trigger Poll events.
> >
> > The patch is simple to implement and easy to understand, similar to how
> the kernel stack is handled.
> >
> > From: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>
> > Sent: Tuesday, July 26, 2022 3:08 PM
> > To: Jun Qiu <jun.qiu@jaguarmicro.com>
> > Cc: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
> > Subject: Re: [PATCH] gro: fix gro with tcp push flag
> >
> > We should do it for the rte_gro_reassemble as well, with timer mode it
> could lead to more duplicate ACKs. I had a proposal for the enhancement
> which would handle both rte_gro_reassemble and
> rte_gro_reassemble_burst but have not got any response yet.
> >
> > I have a custom patch which is working fine for timer mode where there is
> no packet reordering, earlier without the patch there were DUP-ACKs and
> this could potentially affect the window scaling.
> >
> > On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu
> <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > May be in rte_gro_reassemble_burst, where no delay is introduced, PUSH
> > packets can be merged
> >
> > 发件人: kumaraparameshwaran rathinavel
> > <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> > 发送时间: 2022年7月26日 14:41
> > 收件人: Jun Qiu
> <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > 抄送: dev@dpdk.org<mailto:dev@dpdk.org>;
> > jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>;
> > stable@dpdk.org<mailto:stable@dpdk.org>
> > 主题: Re: [PATCH] gro: fix gro with tcp push flag
> >
> >
> >
> > On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu
> <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > TCP data packets sometimes carry a PUSH flag. Currently, only the
> > packets that do not have PUSH flag can be GROed.
> > The packets that have a PUSH flag cannot be GROed, the packets that
> > cannot be processed by GRO are placed last.
> > In this case, the received packets may be out of order.
> > For example, there are two packets mbuf1 and mbuf2. mbuf1 contains
> > PUSH flag, mbuf2 does not contain PUSH flag.
> > After GRO processing, mbuf2 is sent for processing before mbuf1.
> > This out-of-order will affect TCP processing performance and lead to
> > unnecessary dup-ACK.
> >
> > Referring to the Linux kernel implementation, packets with PUSH flag
> > can also perform GRO. And if one of the packets containing PUSH flag,
> > the packets after GRO will carry PUSH flag.
> >
> > In case of smaller transfers in which the TCP segment size is not more than
> one MTU, it is a single TCP packet with PSH flag set, so in those cases we are
> introducing unwanted delay. I think the better approach would be if there
> are previous packets in the flow and the current packet received has PSH flag
> then coalesce with the previous packet, if lookup is failure and the current
> packet has PSH flag set then deliver it immediately.
> >
> > Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> > Cc: stable@dpdk.org<mailto:stable@dpdk.org>
> >
> > Signed-off-by: Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > ---
> > lib/gro/gro_tcp4.c | 4 ++--
> > lib/gro/gro_tcp4.h | 16 +++++++++++++---
> > lib/gro/gro_vxlan_tcp4.c | 4 ++--
> > 3 files changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> > 7498c66141..7849a2bd1d 100644
> > --- a/lib/gro/gro_tcp4.c
> > +++ b/lib/gro/gro_tcp4.c
> > @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> > hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> >
> > /*
> > - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> > + * Don't process the packet which has FIN, SYN, RST, URG, ECE
> > * or CWR set.
> > */
> > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > + RTE_TCP_PSH_FLAG)))
> > return -1;
> > /*
> > * Don't process the packet whose payload length is less than
> > or diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h index
> > 212f97a042..2974faf228 100644
> > --- a/lib/gro/gro_tcp4.h
> > +++ b/lib/gro/gro_tcp4.h
> > @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item
> *item,
> > uint16_t l2_offset)
> > {
> > struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> > - uint16_t hdr_len, l2_len;
> > + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> > + uint16_t hdr_len, l2_len, l3_offset;
> >
> > if (cmp > 0) {
> > pkt_head = item->firstseg; @@ -221,13 +222,22 @@
> > merge_two_tcp4_packets(struct gro_tcp4_item *item,
> > }
> >
> > /* check if the IPv4 packet length is greater than the max value */
> > - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> > - pkt_head->l4_len;
> > + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> > + hdr_len = l3_offset + pkt_head->l4_len;
> > l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> > if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> > hdr_len > MAX_IPV4_PKT_LENGTH))
> > return 0;
> >
> > + /* merge push flag to pkt_head */
> > + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> > + struct rte_tcp_hdr *, l3_offset);
> > + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> > + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> > + struct rte_tcp_hdr *, l3_offset);
> > + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> > + }
> > +
> > /* remove the packet header for the tail packet */
> > rte_pktmbuf_adj(pkt_tail, hdr_len);
> >
> > diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c index
> > 3be4deb7c7..884802af0b 100644
> > --- a/lib/gro/gro_vxlan_tcp4.c
> > +++ b/lib/gro/gro_vxlan_tcp4.c
> > @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf
> *pkt,
> > tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
> > pkt->l3_len);
> >
> > /*
> > - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> > + * Don't process the packet which has FIN, SYN, RST, URG,
> > * ECE or CWR set.
> > */
> > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > + RTE_TCP_PSH_FLAG)))
> > return -1;
> >
> > hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len
> > +
> > --
> > 2.25.1
> >
>
>
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] gro: fix gro with tcp push flag
2022-08-30 8:43 ` Hu, Jiayu
@ 2022-09-05 4:02 ` Hu, Jiayu
2022-09-05 7:07 ` Jun Qiu
0 siblings, 1 reply; 10+ messages in thread
From: Hu, Jiayu @ 2022-09-05 4:02 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: kumaraparameshwaran rathinavel, dev, Jun Qiu
Hi Qiu,
I cannot find the original mail, and please see replies inline.
> -----Original Message-----
> From: Hu, Jiayu
> Sent: Tuesday, August 30, 2022 4:44 PM
> To: 'Thomas Monjalon' <thomas@monjalon.net>
> Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>;
> dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
> Subject: RE: [PATCH] gro: fix gro with tcp push flag
>
> Sure, I will review it.
>
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Monday, August 29, 2022 6:36 PM
> > To: Hu, Jiayu <jiayu.hu@intel.com>
> > Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>;
> > dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
> > Subject: Re: [PATCH] gro: fix gro with tcp push flag
> >
> > Jiayu, please could you help in this review?
> >
> >
> > 27/07/2022 10:44, Jun Qiu:
> > > I think this delay is tolerable.
> > > Many TCP stacks do not take special care of PUSH packets when
> > > receiving
> > them. All received packets with data will trigger Poll events.
> > >
> > > The patch is simple to implement and easy to understand, similar to
> > > how
> > the kernel stack is handled.
> > >
> > > From: kumaraparameshwaran rathinavel
> <kumaraparamesh92@gmail.com>
> > > Sent: Tuesday, July 26, 2022 3:08 PM
> > > To: Jun Qiu <jun.qiu@jaguarmicro.com>
> > > Cc: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
> > > Subject: Re: [PATCH] gro: fix gro with tcp push flag
> > >
> > > We should do it for the rte_gro_reassemble as well, with timer mode
> > > it
> > could lead to more duplicate ACKs. I had a proposal for the
> > enhancement which would handle both rte_gro_reassemble and
> > rte_gro_reassemble_burst but have not got any response yet.
> > >
> > > I have a custom patch which is working fine for timer mode where
> > > there is
> > no packet reordering, earlier without the patch there were DUP-ACKs
> > and this could potentially affect the window scaling.
> > >
> > > On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > > May be in rte_gro_reassemble_burst, where no delay is introduced,
> > > PUSH packets can be merged
> > >
> > > 发件人: kumaraparameshwaran rathinavel
> > >
> <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> > > 发送时间: 2022年7月26日 14:41
> > > 收件人: Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > > 抄送: dev@dpdk.org<mailto:dev@dpdk.org>;
> > > jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>;
> > > stable@dpdk.org<mailto:stable@dpdk.org>
> > > 主题: Re: [PATCH] gro: fix gro with tcp push flag
> > >
> > >
> > >
> > > On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > > TCP data packets sometimes carry a PUSH flag. Currently, only the
> > > packets that do not have PUSH flag can be GROed.
> > > The packets that have a PUSH flag cannot be GROed, the packets that
> > > cannot be processed by GRO are placed last.
> > > In this case, the received packets may be out of order.
> > > For example, there are two packets mbuf1 and mbuf2. mbuf1 contains
> > > PUSH flag, mbuf2 does not contain PUSH flag.
> > > After GRO processing, mbuf2 is sent for processing before mbuf1.
> > > This out-of-order will affect TCP processing performance and lead to
> > > unnecessary dup-ACK.
> > >
> > > Referring to the Linux kernel implementation, packets with PUSH flag
> > > can also perform GRO. And if one of the packets containing PUSH
> > > flag, the packets after GRO will carry PUSH flag.
> > >
> > > In case of smaller transfers in which the TCP segment size is not
> > > more than
> > one MTU, it is a single TCP packet with PSH flag set, so in those
> > cases we are introducing unwanted delay. I think the better approach
> > would be if there are previous packets in the flow and the current
> > packet received has PSH flag then coalesce with the previous packet,
> > if lookup is failure and the current packet has PSH flag set then deliver it
> immediately.
I agree with Kuma. The implementation in the patch will make PSH packet
wait in the gro table. For a PSH packet, we need to try to merge it with
previous packets in the table first. If merge successfully, the merged packet
needs to mark with PSH flag and deliver it to app then; If not, this
PSH packet also needs to be delivered to app immediately.
Thanks,
Jiayu
> > >
> > > Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> > > Cc: stable@dpdk.org<mailto:stable@dpdk.org>
> > >
> > > Signed-off-by: Jun Qiu
> > > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > > ---
> > > lib/gro/gro_tcp4.c | 4 ++--
> > > lib/gro/gro_tcp4.h | 16 +++++++++++++---
> > > lib/gro/gro_vxlan_tcp4.c | 4 ++--
> > > 3 files changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> > > 7498c66141..7849a2bd1d 100644
> > > --- a/lib/gro/gro_tcp4.c
> > > +++ b/lib/gro/gro_tcp4.c
> > > @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> > > hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> > >
> > > /*
> > > - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> > > + * Don't process the packet which has FIN, SYN, RST, URG,
> > > + ECE
> > > * or CWR set.
> > > */
> > > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > > + RTE_TCP_PSH_FLAG)))
> > > return -1;
> > > /*
> > > * Don't process the packet whose payload length is less
> > > than or diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h index
> > > 212f97a042..2974faf228 100644
> > > --- a/lib/gro/gro_tcp4.h
> > > +++ b/lib/gro/gro_tcp4.h
> > > @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item
> > *item,
> > > uint16_t l2_offset)
> > > {
> > > struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> > > - uint16_t hdr_len, l2_len;
> > > + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> > > + uint16_t hdr_len, l2_len, l3_offset;
> > >
> > > if (cmp > 0) {
> > > pkt_head = item->firstseg; @@ -221,13 +222,22 @@
> > > merge_two_tcp4_packets(struct gro_tcp4_item *item,
> > > }
> > >
> > > /* check if the IPv4 packet length is greater than the max value */
> > > - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> > > - pkt_head->l4_len;
> > > + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> > > + hdr_len = l3_offset + pkt_head->l4_len;
> > > l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> > > if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> > > hdr_len > MAX_IPV4_PKT_LENGTH))
> > > return 0;
> > >
> > > + /* merge push flag to pkt_head */
> > > + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> > > + struct rte_tcp_hdr *, l3_offset);
> > > + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> > > + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> > > + struct rte_tcp_hdr *, l3_offset);
> > > + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> > > + }
> > > +
> > > /* remove the packet header for the tail packet */
> > > rte_pktmbuf_adj(pkt_tail, hdr_len);
> > >
> > > diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
> > > index 3be4deb7c7..884802af0b 100644
> > > --- a/lib/gro/gro_vxlan_tcp4.c
> > > +++ b/lib/gro/gro_vxlan_tcp4.c
> > > @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf
> > *pkt,
> > > tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
> > > pkt->l3_len);
> > >
> > > /*
> > > - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> > > + * Don't process the packet which has FIN, SYN, RST, URG,
> > > * ECE or CWR set.
> > > */
> > > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > > + RTE_TCP_PSH_FLAG)))
> > > return -1;
> > >
> > > hdr_len = pkt->outer_l2_len + pkt->outer_l3_len +
> > > pkt->l2_len
> > > +
> > > --
> > > 2.25.1
> > >
> >
> >
> >
> >
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] gro: fix gro with tcp push flag
2022-09-05 4:02 ` Hu, Jiayu
@ 2022-09-05 7:07 ` Jun Qiu
0 siblings, 0 replies; 10+ messages in thread
From: Jun Qiu @ 2022-09-05 7:07 UTC (permalink / raw)
To: Hu, Jiayu, Thomas Monjalon; +Cc: kumaraparameshwaran rathinavel, dev
Of course, the method you mentioned can solve the delay problem perfectly, but GRO performance will be degraded.
In Mellanox's LRO implementation, there is an lro_psh_flag flag that, if set, allows merging packets with push flags regardless of latency.
My solution is a simple implementation with lro_psh_flag set by default.
-----Original Message-----
From: Hu, Jiayu <jiayu.hu@intel.com>
Sent: Monday, September 5, 2022 12:03 PM
To: Thomas Monjalon <thomas@monjalon.net>
Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>; dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
Subject: RE: [PATCH] gro: fix gro with tcp push flag
Hi Qiu,
I cannot find the original mail, and please see replies inline.
> -----Original Message-----
> From: Hu, Jiayu
> Sent: Tuesday, August 30, 2022 4:44 PM
> To: 'Thomas Monjalon' <thomas@monjalon.net>
> Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>;
> dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
> Subject: RE: [PATCH] gro: fix gro with tcp push flag
>
> Sure, I will review it.
>
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Monday, August 29, 2022 6:36 PM
> > To: Hu, Jiayu <jiayu.hu@intel.com>
> > Cc: kumaraparameshwaran rathinavel <kumaraparamesh92@gmail.com>;
> > dev@dpdk.org; Jun Qiu <jun.qiu@jaguarmicro.com>
> > Subject: Re: [PATCH] gro: fix gro with tcp push flag
> >
> > Jiayu, please could you help in this review?
> >
> >
> > 27/07/2022 10:44, Jun Qiu:
> > > I think this delay is tolerable.
> > > Many TCP stacks do not take special care of PUSH packets when
> > > receiving
> > them. All received packets with data will trigger Poll events.
> > >
> > > The patch is simple to implement and easy to understand, similar
> > > to how
> > the kernel stack is handled.
> > >
> > > From: kumaraparameshwaran rathinavel
> <kumaraparamesh92@gmail.com>
> > > Sent: Tuesday, July 26, 2022 3:08 PM
> > > To: Jun Qiu <jun.qiu@jaguarmicro.com>
> > > Cc: dev@dpdk.org; jiayu.hu@intel.com; stable@dpdk.org
> > > Subject: Re: [PATCH] gro: fix gro with tcp push flag
> > >
> > > We should do it for the rte_gro_reassemble as well, with timer
> > > mode it
> > could lead to more duplicate ACKs. I had a proposal for the
> > enhancement which would handle both rte_gro_reassemble and
> > rte_gro_reassemble_burst but have not got any response yet.
> > >
> > > I have a custom patch which is working fine for timer mode where
> > > there is
> > no packet reordering, earlier without the patch there were DUP-ACKs
> > and this could potentially affect the window scaling.
> > >
> > > On Tue, Jul 26, 2022 at 12:27 PM Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > > May be in rte_gro_reassemble_burst, where no delay is introduced,
> > > PUSH packets can be merged
> > >
> > > 发件人: kumaraparameshwaran rathinavel
> > >
> <kumaraparamesh92@gmail.com<mailto:kumaraparamesh92@gmail.com>>
> > > 发送时间: 2022年7月26日 14:41
> > > 收件人: Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > > 抄送: dev@dpdk.org<mailto:dev@dpdk.org>;
> > > jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>;
> > > stable@dpdk.org<mailto:stable@dpdk.org>
> > > 主题: Re: [PATCH] gro: fix gro with tcp push flag
> > >
> > >
> > >
> > > On Tue, Jul 26, 2022 at 11:48 AM Jun Qiu
> > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>> wrote:
> > > TCP data packets sometimes carry a PUSH flag. Currently, only the
> > > packets that do not have PUSH flag can be GROed.
> > > The packets that have a PUSH flag cannot be GROed, the packets
> > > that cannot be processed by GRO are placed last.
> > > In this case, the received packets may be out of order.
> > > For example, there are two packets mbuf1 and mbuf2. mbuf1 contains
> > > PUSH flag, mbuf2 does not contain PUSH flag.
> > > After GRO processing, mbuf2 is sent for processing before mbuf1.
> > > This out-of-order will affect TCP processing performance and lead
> > > to unnecessary dup-ACK.
> > >
> > > Referring to the Linux kernel implementation, packets with PUSH
> > > flag can also perform GRO. And if one of the packets containing
> > > PUSH flag, the packets after GRO will carry PUSH flag.
> > >
> > > In case of smaller transfers in which the TCP segment size is not
> > > more than
> > one MTU, it is a single TCP packet with PSH flag set, so in those
> > cases we are introducing unwanted delay. I think the better
> > approach would be if there are previous packets in the flow and the
> > current packet received has PSH flag then coalesce with the previous
> > packet, if lookup is failure and the current packet has PSH flag set
> > then deliver it
> immediately.
I agree with Kuma. The implementation in the patch will make PSH packet wait in the gro table. For a PSH packet, we need to try to merge it with previous packets in the table first. If merge successfully, the merged packet needs to mark with PSH flag and deliver it to app then; If not, this PSH packet also needs to be delivered to app immediately.
Thanks,
Jiayu
> > >
> > > Fixes: 0d2cbe59b719 ("lib/gro: support TCP/IPv4")
> > > Cc: stable@dpdk.org<mailto:stable@dpdk.org>
> > >
> > > Signed-off-by: Jun Qiu
> > > <jun.qiu@jaguarmicro.com<mailto:jun.qiu@jaguarmicro.com>>
> > > ---
> > > lib/gro/gro_tcp4.c | 4 ++--
> > > lib/gro/gro_tcp4.h | 16 +++++++++++++---
> > > lib/gro/gro_vxlan_tcp4.c | 4 ++--
> > > 3 files changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index
> > > 7498c66141..7849a2bd1d 100644
> > > --- a/lib/gro/gro_tcp4.c
> > > +++ b/lib/gro/gro_tcp4.c
> > > @@ -220,10 +220,10 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
> > > hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
> > >
> > > /*
> > > - * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
> > > + * Don't process the packet which has FIN, SYN, RST, URG,
> > > + ECE
> > > * or CWR set.
> > > */
> > > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > > + RTE_TCP_PSH_FLAG)))
> > > return -1;
> > > /*
> > > * Don't process the packet whose payload length is less
> > > than or diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h index
> > > 212f97a042..2974faf228 100644
> > > --- a/lib/gro/gro_tcp4.h
> > > +++ b/lib/gro/gro_tcp4.h
> > > @@ -210,7 +210,8 @@ merge_two_tcp4_packets(struct gro_tcp4_item
> > *item,
> > > uint16_t l2_offset) {
> > > struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
> > > - uint16_t hdr_len, l2_len;
> > > + struct rte_tcp_hdr *head_tcp_hdr, *tail_tcp_hdr;
> > > + uint16_t hdr_len, l2_len, l3_offset;
> > >
> > > if (cmp > 0) {
> > > pkt_head = item->firstseg; @@ -221,13 +222,22 @@
> > > merge_two_tcp4_packets(struct gro_tcp4_item *item,
> > > }
> > >
> > > /* check if the IPv4 packet length is greater than the max value */
> > > - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
> > > - pkt_head->l4_len;
> > > + l3_offset = l2_offset + pkt_head->l2_len + pkt_head->l3_len;
> > > + hdr_len = l3_offset + pkt_head->l4_len;
> > > l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
> > > if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
> > > hdr_len > MAX_IPV4_PKT_LENGTH))
> > > return 0;
> > >
> > > + /* merge push flag to pkt_head */
> > > + tail_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_tail,
> > > + struct rte_tcp_hdr *, l3_offset);
> > > + if (tail_tcp_hdr->tcp_flags & RTE_TCP_PSH_FLAG) {
> > > + head_tcp_hdr = rte_pktmbuf_mtod_offset(pkt_head,
> > > + struct rte_tcp_hdr *, l3_offset);
> > > + head_tcp_hdr->tcp_flags |= RTE_TCP_PSH_FLAG;
> > > + }
> > > +
> > > /* remove the packet header for the tail packet */
> > > rte_pktmbuf_adj(pkt_tail, hdr_len);
> > >
> > > diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c
> > > index 3be4deb7c7..884802af0b 100644
> > > --- a/lib/gro/gro_vxlan_tcp4.c
> > > +++ b/lib/gro/gro_vxlan_tcp4.c
> > > @@ -326,10 +326,10 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf
> > *pkt,
> > > tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
> > > pkt->l3_len);
> > >
> > > /*
> > > - * Don't process the packet which has FIN, SYN, RST, PSH, URG,
> > > + * Don't process the packet which has FIN, SYN, RST, URG,
> > > * ECE or CWR set.
> > > */
> > > - if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)
> > > + if (tcp_hdr->tcp_flags & (~(RTE_TCP_ACK_FLAG |
> > > + RTE_TCP_PSH_FLAG)))
> > > return -1;
> > >
> > > hdr_len = pkt->outer_l2_len + pkt->outer_l3_len +
> > > pkt->l2_len
> > > +
> > > --
> > > 2.25.1
> > >
> >
> >
> >
> >
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2022-10-14 2:37 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-14 2:37 [PATCH] gro: fix gro with tcp push flag Jun Qiu
-- strict thread matches above, loose matches on Subject: below --
2022-07-26 6:17 Jun Qiu
2022-07-26 6:41 ` kumaraparameshwaran rathinavel
2022-07-26 6:57 ` 答复: " Jun Qiu
2022-07-26 7:08 ` kumaraparameshwaran rathinavel
2022-07-27 8:44 ` Jun Qiu
2022-08-29 10:36 ` Thomas Monjalon
2022-08-30 8:43 ` Hu, Jiayu
2022-09-05 4:02 ` Hu, Jiayu
2022-09-05 7:07 ` Jun Qiu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).