From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 582C4A04B5; Thu, 10 Sep 2020 04:47:50 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id CAB791BEB3; Thu, 10 Sep 2020 04:47:49 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 43F6A1DB8 for ; Thu, 10 Sep 2020 04:47:47 +0200 (CEST) IronPort-SDR: rZoWon+zWvaf9ka2PIY0pT8wPYb3X2fVybWpC2JYFuLTBWjzHRskFhJgVc370tL+PFtfvrMN0I S6PBnM1o5dCA== X-IronPort-AV: E=McAfee;i="6000,8403,9739"; a="155920076" X-IronPort-AV: E=Sophos;i="5.76,411,1592895600"; d="scan'208,217";a="155920076" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Sep 2020 19:47:46 -0700 IronPort-SDR: tWWm+twwVrYKGkKyVj7Wb+7OSbxw1vSXnY631RuaqL/SB+q0tN92+SSkumDNWaisx6P4P4jqOT bzR2j9wnZryw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,411,1592895600"; d="scan'208,217";a="329154616" Received: from fmsmsx603.amr.corp.intel.com ([10.18.126.83]) by fmsmga004.fm.intel.com with ESMTP; 09 Sep 2020 19:47:45 -0700 Received: from shsmsx606.ccr.corp.intel.com (10.109.6.216) by fmsmsx603.amr.corp.intel.com (10.18.126.83) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.1713.5; Wed, 9 Sep 2020 19:47:45 -0700 Received: from shsmsx606.ccr.corp.intel.com (10.109.6.216) by SHSMSX606.ccr.corp.intel.com (10.109.6.216) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.1713.5; Thu, 10 Sep 2020 10:47:43 +0800 Received: from shsmsx606.ccr.corp.intel.com ([10.109.6.216]) by SHSMSX606.ccr.corp.intel.com ([10.109.6.216]) with mapi id 15.01.1713.004; Thu, 10 Sep 2020 10:47:43 +0800 From: "Hu, Jiayu" To: yang_y_yi CC: "thomas@monjalon.net" , "dev@dpdk.org" , "yangyi01@inspur.com" Thread-Topic: Re:Re: [dpdk-dev] [PATCH v4 1/2] gro: add UDP GRO support Thread-Index: AQHWhoeynIRw5dNQwEOH9C1PqMFwcKlhLDgQ Date: Thu, 10 Sep 2020 02:47:43 +0000 Message-ID: <9dfa31fd84d847ccbd459e1b54eb32a5@intel.com> References: <20200904083740.71272-1-yang_y_yi@163.com> <20200904083740.71272-2-yang_y_yi@163.com> <20200908020516.GA68491@NPG_DPDK_VIRTIO_jiayuhu_15.sh.intel.com> <3d73e64d.45c4.17471c10896.Coremail.yang_y_yi@163.com> <42e99430.5aa5.17472180a8e.Coremail.yang_y_yi@163.com> In-Reply-To: <42e99430.5aa5.17472180a8e.Coremail.yang_y_yi@163.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: dlp-reaction: no-action dlp-version: 11.5.1.3 dlp-product: dlpe-windows x-originating-ip: [10.239.127.36] MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable X-Content-Filtered-By: Mailman/MimeDel 2.1.15 Subject: Re: [dpdk-dev] [PATCH v4 1/2] gro: add UDP GRO support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" No more comments on this patch. Thanks, Jiayu From: yang_y_yi Sent: Wednesday, September 9, 2020 5:00 PM To: Hu, Jiayu Cc: thomas@monjalon.net; dev@dpdk.org; yangyi01@inspur.com Subject: Re:Re: [dpdk-dev] [PATCH v4 1/2] gro: add UDP GRO support Importance: High This is incremental patch to fix your new comments, I'll send out v5 if no = more comments here. diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c index 25584fd..061e7b0 100644 --- a/lib/librte_gro/gro_udp4.c +++ b/lib/librte_gro/gro_udp4.c @@ -107,7 +107,7 @@ uint32_t item_idx; item_idx =3D find_an_empty_item(tbl); - if (item_idx =3D=3D INVALID_ARRAY_INDEX) + if (unlikely(item_idx =3D=3D INVALID_ARRAY_INDEX)) return INVALID_ARRAY_INDEX; tbl->items[item_idx].firstseg =3D pkt; @@ -163,7 +163,6 @@ dst->ip_src_addr =3D src->ip_src_addr; dst->ip_dst_addr =3D src->ip_dst_addr; dst->ip_id =3D src->ip_id; - dst->ip_id =3D src->ip_id; tbl->flows[flow_idx].start_index =3D item_idx; tbl->flow_num++; @@ -226,10 +225,14 @@ * Don't process the packet whose payload length is less than or * equal to 0. */ - if (pkt->pkt_len - hdr_len <=3D 0) + if (pkt->pkt_len <=3D hdr_len) + return -1; + + ip_dl =3D rte_be_to_cpu_16(ipv4_hdr->total_length); + if (ip_dl <=3D pkt->l3_len) return -1; - ip_dl =3D rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len; + ip_dl -=3D pkt->l3_len; ip_id =3D rte_be_to_cpu_16(ipv4_hdr->packet_id); frag_offset =3D rte_be_to_cpu_16(ipv4_hdr->fragment_offset); is_last_frag =3D ((frag_offset & RTE_IPV4_HDR_MF_FLAG) =3D=3D 0) ? = 1 : 0; @@ -263,7 +266,7 @@ item_idx =3D insert_new_item(tbl, pkt, start_time, INVALID_ARRAY_INDEX, frag_offset, is_last_frag); - if (item_idx =3D=3D INVALID_ARRAY_INDEX) + if (unlikely(item_idx =3D=3D INVALID_ARRAY_INDEX)) return -1; if (insert_new_flow(tbl, &key, item_idx) =3D=3D INVALID_ARRAY_INDEX) { @@ -284,7 +287,7 @@ cur_idx =3D tbl->flows[i].start_index; prev_idx =3D cur_idx; do { - cmp =3D udp_check_neighbor(&(tbl->items[cur_idx]), + cmp =3D udp4_check_neighbor(&(tbl->items[cur_idx]), frag_offset, ip_dl, 0); if (cmp) { if (merge_two_udp4_packets(&(tbl->items[cur_idx]), @@ -319,7 +322,7 @@ item_idx =3D insert_new_item(tbl, pkt, start_time, INVALID_ARRAY_INDEX, frag_offset, is_last_frag); - if (item_idx =3D=3D INVALID_ARRAY_INDEX) + if (unlikely(item_idx =3D=3D INVALID_ARRAY_INDEX)) return -1; tbl->items[item_idx].next_pkt_idx =3D cur_idx; tbl->flows[i].start_index =3D item_idx; @@ -348,12 +351,11 @@ item_idx =3D tbl->items[start_idx].next_pkt_idx; while (item_idx !=3D INVALID_ARRAY_INDEX) { pkt =3D tbl->items[item_idx].firstseg; - hdr_len =3D pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2= _len + - pkt->l3_len; + hdr_len =3D pkt->l2_len + pkt->l3_len; ip_dl =3D pkt->pkt_len - hdr_len; frag_offset =3D tbl->items[item_idx].frag_offset; is_last_frag =3D tbl->items[item_idx].is_last_frag; - cmp =3D udp_check_neighbor(&(tbl->items[start_idx]), + cmp =3D udp4_check_neighbor(&(tbl->items[start_idx]), frag_offset, ip_dl, 0); if (cmp) { if (merge_two_udp4_packets( @@ -364,12 +366,10 @@ INVALID_ARRAY_INDEX= ); tbl->items[start_idx].next_pkt_idx =3D item_idx; - } else { + } else return 0; - } - } else { + } else return 0; - } } return 0; diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h index bc67eb1..0a078e4 100644 --- a/lib/librte_gro/gro_udp4.h +++ b/lib/librte_gro/gro_udp4.h @@ -246,7 +246,7 @@ uint16_t gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl= *tbl, * Check if two UDP/IPv4 packets are neighbors. */ static inline int -udp_check_neighbor(struct gro_udp4_item *item, +udp4_check_neighbor(struct gro_udp4_item *item, uint16_t frag_offset, uint16_t ip_dl, uint16_t l2_offset) At 2020-09-09 15:25:10, "yang_y_yi" > wrote: > > >At 2020-09-08 10:05:16, "Jiayu Hu" > wrote: >>Some comments are inline. > >Thanks, replies inline. > >> >>Thanks, >>Jiayu >> >>On Fri, Sep 04, 2020 at 04:37:39PM +0800, yang_y_yi@163.com wrote: >>> From: Yi Yang > >>> >>> UDP GRO can help improve VM-to-VM UDP performance when >>> VM is enabled UFO or GSO, GRO must be supported if GSO >>> or UFO is enabled, otherwise, performance gain will be >>> hurt. >>> >>> With this enabled in DPDK, OVS DPDK can leverage it >>> to improve VM-to-VM UDP performance, this will make >>> sure IP fragments will be reassembled once it is >>> received from physical NIC. It is very helpful in OVS >>> DPDK VLAN TSO case. >>> >>> Signed-off-by: Yi Yang = > >>> --- >>> lib/librte_gro/Makefile | 1 + >>> lib/librte_gro/gro_udp4.c | 430 +++++++++++++++++++++++++++++++++++++= ++++++++ >>> lib/librte_gro/gro_udp4.h | 281 +++++++++++++++++++++++++++++ >>> lib/librte_gro/meson.build | 2 +- >>> lib/librte_gro/rte_gro.c | 93 ++++++++-- >>> lib/librte_gro/rte_gro.h | 5 +- >>> 6 files changed, 796 insertions(+), 16 deletions(-) >>> create mode 100644 lib/librte_gro/gro_udp4.c >>> create mode 100644 lib/librte_gro/gro_udp4.h >>> >>> diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile >>> index e848687..41ec29e 100644 >>> --- a/lib/librte_gro/Makefile >>> +++ b/lib/librte_gro/Makefile >>> @@ -15,6 +15,7 @@ EXPORT_MAP :=3D rte_gro_version.map >>> # source files >>> SRCS-$(CONFIG_RTE_LIBRTE_GRO) +=3D rte_gro.c >>> SRCS-$(CONFIG_RTE_LIBRTE_GRO) +=3D gro_tcp4.c >>> +SRCS-$(CONFIG_RTE_LIBRTE_GRO) +=3D gro_udp4.c >>> SRCS-$(CONFIG_RTE_LIBRTE_GRO) +=3D gro_vxlan_tcp4.c >>> >>> # install this header file >>> diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c >>> new file mode 100644 >>> index 0000000..25584fd >>> --- /dev/null >>> +++ b/lib/librte_gro/gro_udp4.c >>> @@ -0,0 +1,430 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2020 Inspur Corporation >>> + */ >>> + >>> +#include >>> +#include >>> +#include >>> +#include >>> + >>> +#include "gro_udp4.h" >>> + >>> +void * >>> +gro_udp4_tbl_create(uint16_t socket_id, >>> + uint16_t max_flow_num, >>> + uint16_t max_item_per_flow) >>> +{ >>> + struct gro_udp4_tbl *tbl; >>> + size_t size; >>> + uint32_t entries_num, i; >>> + >>> + entries_num =3D max_flow_num * max_item_per_flow; >>> + entries_num =3D RTE_MIN(entries_num, GRO_UDP4_TBL_MAX_ITEM_NUM); >>> + >>> + if (entries_num =3D=3D 0) >>> + return NULL; >>> + >>> + tbl =3D rte_zmalloc_socket(__func__, >>> + sizeof(struct gro_udp4_tbl), >>> + RTE_CACHE_LINE_SIZE, >>> + socket_id); >>> + if (tbl =3D=3D NULL) >>> + return NULL; >>> + >>> + size =3D sizeof(struct gro_udp4_item) * entries_num; >>> + tbl->items =3D rte_zmalloc_socket(__func__, >>> + size, >>> + RTE_CACHE_LINE_SIZE, >>> + socket_id); >>> + if (tbl->items =3D=3D NULL) { >>> + rte_free(tbl); >>> + return NULL; >>> + } >>> + tbl->max_item_num =3D entries_num; >>> + >>> + size =3D sizeof(struct gro_udp4_flow) * entries_num; >>> + tbl->flows =3D rte_zmalloc_socket(__func__, >>> + size, >>> + RTE_CACHE_LINE_SIZE, >>> + socket_id); >>> + if (tbl->flows =3D=3D NULL) { >>> + rte_free(tbl->items); >>> + rte_free(tbl); >>> + return NULL; >>> + } >>> + /* INVALID_ARRAY_INDEX indicates an empty flow */ >>> + for (i =3D 0; i < entries_num; i++) >>> + tbl->flows[i].start_index =3D INVALID_ARRAY_INDEX; >>> + tbl->max_flow_num =3D entries_num; >>> + >>> + return tbl; >>> +} >>> + >>> +void >>> +gro_udp4_tbl_destroy(void *tbl) >>> +{ >>> + struct gro_udp4_tbl *udp_tbl =3D tbl; >>> + >>> + if (udp_tbl) { >>> + rte_free(udp_tbl->items); >>> + rte_free(udp_tbl->flows); >>> + } >>> + rte_free(udp_tbl); >>> +} >>> + >>> +static inline uint32_t >>> +find_an_empty_item(struct gro_udp4_tbl *tbl) >>> +{ >>> + uint32_t i; >>> + uint32_t max_item_num =3D tbl->max_item_num; >>> + >>> + for (i =3D 0; i < max_item_num; i++) >>> + if (tbl->items[i].firstseg =3D=3D NULL) >>> + return i; >>> + return INVALID_ARRAY_INDEX; >>> +} >>> + >>> +static inline uint32_t >>> +find_an_empty_flow(struct gro_udp4_tbl *tbl) >>> +{ >>> + uint32_t i; >>> + uint32_t max_flow_num =3D tbl->max_flow_num; >>> + >>> + for (i =3D 0; i < max_flow_num; i++) >>> + if (tbl->flows[i].start_index =3D=3D INVALID_ARRAY_INDEX) >>> + return i; >>> + return INVALID_ARRAY_INDEX; >>> +} >>> + >>> +static inline uint32_t >>> +insert_new_item(struct gro_udp4_tbl *tbl, >>> + struct rte_mbuf *pkt, >>> + uint64_t start_time, >>> + uint32_t prev_idx, >>> + uint16_t frag_offset, >>> + uint8_t is_last_frag) >>> +{ >>> + uint32_t item_idx; >>> + >>> + item_idx =3D find_an_empty_item(tbl); >>> + if (item_idx =3D=3D INVALID_ARRAY_INDEX) >>> + return INVALID_ARRAY_INDEX; >>> + >>> + tbl->items[item_idx].firstseg =3D pkt; >>> + tbl->items[item_idx].lastseg =3D rte_pktmbuf_lastseg(pkt); >>> + tbl->items[item_idx].start_time =3D start_time; >>> + tbl->items[item_idx].next_pkt_idx =3D INVALID_ARRAY_INDEX; >>> + tbl->items[item_idx].frag_offset =3D frag_offset; >>> + tbl->items[item_idx].is_last_frag =3D is_last_frag; >>> + tbl->items[item_idx].nb_merged =3D 1; >>> + tbl->item_num++; >>> + >>> + /* if the previous packet exists, chain them together. */ >>> + if (prev_idx !=3D INVALID_ARRAY_INDEX) { >>> + tbl->items[item_idx].next_pkt_idx =3D >>> + tbl->items[prev_idx].next_pkt_idx; >>> + tbl->items[prev_idx].next_pkt_idx =3D item_idx; >>> + } >>> + >>> + return item_idx; >>> +} >>> + >>> +static inline uint32_t >>> +delete_item(struct gro_udp4_tbl *tbl, uint32_t item_idx, >>> + uint32_t prev_item_idx) >>> +{ >>> + uint32_t next_idx =3D tbl->items[item_idx].next_pkt_idx; >>> + >>> + /* NULL indicates an empty item */ >>> + tbl->items[item_idx].firstseg =3D NULL; >>> + tbl->item_num--; >>> + if (prev_item_idx !=3D INVALID_ARRAY_INDEX) >>> + tbl->items[prev_item_idx].next_pkt_idx =3D next_idx; >>> + >>> + return next_idx; >>> +} >>> + >>> +static inline uint32_t >>> +insert_new_flow(struct gro_udp4_tbl *tbl, >>> + struct udp4_flow_key *src, >>> + uint32_t item_idx) >>> +{ >>> + struct udp4_flow_key *dst; >>> + uint32_t flow_idx; >>> + >>> + flow_idx =3D find_an_empty_flow(tbl); >>> + if (unlikely(flow_idx =3D=3D INVALID_ARRAY_INDEX)) >>> + return INVALID_ARRAY_INDEX; >>> + >>> + dst =3D &(tbl->flows[flow_idx].key); >>> + >>> + rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); >>> + rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); >>> + dst->ip_src_addr =3D src->ip_src_addr; >>> + dst->ip_dst_addr =3D src->ip_dst_addr; >>> + dst->ip_id =3D src->ip_id; >>> + dst->ip_id =3D src->ip_id; >> >>Duplicated code. > >Thanks, good catch, will remove it. > >> >>> + >>> + tbl->flows[flow_idx].start_index =3D item_idx; >>> + tbl->flow_num++; >>> + >>> + return flow_idx; >>> +} >>> + >>> +/* >>> + * update the packet length for the flushed packet. >>> + */ >>> +static inline void >>> +update_header(struct gro_udp4_item *item) >>> +{ >>> + struct rte_ipv4_hdr *ipv4_hdr; >>> + struct rte_mbuf *pkt =3D item->firstseg; >>> + uint16_t frag_offset; >>> + >>> + ipv4_hdr =3D (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + >>> + pkt->l2_len); >>> + ipv4_hdr->total_length =3D rte_cpu_to_be_16(pkt->pkt_len - >>> + pkt->l2_len); >>> + >>> + /* Clear MF bit if it is last fragment */ >>> + if (item->is_last_frag) { >>> + frag_offset =3D rte_be_to_cpu_16(ipv4_hdr->fragment_offset); >>> + ipv4_hdr->fragment_offset =3D >>> + rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG)= ; >>> + } >>> +} >>> + >>> +int32_t >>> +gro_udp4_reassemble(struct rte_mbuf *pkt, >>> + struct gro_udp4_tbl *tbl, >>> + uint64_t start_time) >>> +{ >>> + struct rte_ether_hdr *eth_hdr; >>> + struct rte_ipv4_hdr *ipv4_hdr; >>> + uint16_t ip_dl; >>> + uint16_t ip_id, hdr_len; >>> + uint16_t frag_offset =3D 0; >>> + uint8_t is_last_frag; >>> + >>> + struct udp4_flow_key key; >>> + uint32_t cur_idx, prev_idx, item_idx; >>> + uint32_t i, max_flow_num, remaining_flow_num; >>> + int cmp; >>> + uint8_t find; >>> + >>> + eth_hdr =3D rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); >>> + ipv4_hdr =3D (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); >>> + hdr_len =3D pkt->l2_len + pkt->l3_len; >>> + >>> + /* >>> + * Don't process non-fragment packet. >>> + */ >>> + if (!is_ipv4_fragment(ipv4_hdr)) >>> + return -1; >>> + >>> + /* >>> + * Don't process the packet whose payload length is less than or >>> + * equal to 0. >>> + */ >>> + if (pkt->pkt_len - hdr_len <=3D 0) >>> + return -1; >> >>If input packets are malicious, whose mbuf->pkt_len is smaller than >>(mbuf->l2_len+mbuf->l3_len), the above check wotn't work correctly, >>as its value is uint16_t, which is always positive. > >I tried the below code. > >$ cat a.c >#include >#include > >int main() >{ > uint16_t a =3D 10; > uint16_t b =3D 8; > > if ((b - a) < 0) { > printf("%u < %u\n", b , a); > } >} >$ gcc a.c >$ ./a.out >8 < 10 >$ > >It works, so I think it is ok, nervertheless, I'll change it to the below = to avoid your concern. > >if (pkt->pkt_len <=3D hdr_len) > >> >>> + >>> + ip_dl =3D rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len; >> >>Same as above. > >I think "if (pkt->pkt_len <=3D hdr_len)" can ensure ip_dl will be positive= , I can add one more >condition check here if you want > >if (rte_be_to_cpu_16(ipv4_hdr->total_length) <=3D l3_len) > return -1; > >But I think it is unnecessary here for a little bit performance as you're = caring. > >> >>> + ip_id =3D rte_be_to_cpu_16(ipv4_hdr->packet_id); >>> + frag_offset =3D rte_be_to_cpu_16(ipv4_hdr->fragment_offset); >>> + is_last_frag =3D ((frag_offset & RTE_IPV4_HDR_MF_FLAG) =3D=3D 0) ? 1= : 0; >>> + frag_offset =3D (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) <= < 3; >>> + >>> + rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); >>> + rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr)); >>> + key.ip_src_addr =3D ipv4_hdr->src_addr; >>> + key.ip_dst_addr =3D ipv4_hdr->dst_addr; >>> + key.ip_id =3D ip_id; >>> + >>> + /* Search for a matched flow. */ >>> + max_flow_num =3D tbl->max_flow_num; >>> + remaining_flow_num =3D tbl->flow_num; >>> + find =3D 0; >>> + for (i =3D 0; i < max_flow_num && remaining_flow_num; i++) { >>> + if (tbl->flows[i].start_index !=3D INVALID_ARRAY_INDEX) { >>> + if (is_same_udp4_flow(tbl->flows[i].key, key)) { >>> + find =3D 1; >>> + break; >>> + } >>> + remaining_flow_num--; >>> + } >>> + } >>> + >>> + /* >>> + * Fail to find a matched flow. Insert a new flow and store the >>> + * packet into the flow. >>> + */ >>> + if (find =3D=3D 0) { >>> + item_idx =3D insert_new_item(tbl, pkt, start_time, >>> + INVALID_ARRAY_INDEX, frag_offset, >>> + is_last_frag); >>> + if (item_idx =3D=3D INVALID_ARRAY_INDEX) >>> + return -1; >> >>Adding unlikely here could get better performance, IMO. > >No problem. > >> >>> + if (insert_new_flow(tbl, &key, item_idx) =3D=3D >>> + INVALID_ARRAY_INDEX) { >>> + /* >>> + * Fail to insert a new flow, so delete the >>> + * stored packet. >>> + */ >>> + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); >>> + return -1; >>> + } >>> + return 0; >>> + } >>> + >>> + /* >>> + * Check all packets in the flow and try to find a neighbor for >>> + * the input packet. >>> + */ >>> + cur_idx =3D tbl->flows[i].start_index; >>> + prev_idx =3D cur_idx; >>> + do { >>> + cmp =3D udp_check_neighbor(&(tbl->items[cur_idx]), >>> + frag_offset, ip_dl, 0); >>> + if (cmp) { >>> + if (merge_two_udp4_packets(&(tbl->items[cur_idx]), >>> + pkt, cmp, frag_offset, >>> + is_last_frag, 0)) >>> + return 1; >>> + /* >>> + * Fail to merge the two packets, as the packet >>> + * length is greater than the max value. Store >>> + * the packet into the flow. >>> + */ >>> + if (insert_new_item(tbl, pkt, start_time, prev_idx, >>> + frag_offset, is_last_frag) =3D= =3D >>> + INVALID_ARRAY_INDEX) >>> + return -1; >>> + return 0; >>> + } >>> + >>> + /* Ensure inserted items are ordered by frag_offset */ >>> + if (frag_offset >>> + < tbl->items[cur_idx].frag_offset) { >>> + break; >>> + } >>> + >>> + prev_idx =3D cur_idx; >>> + cur_idx =3D tbl->items[cur_idx].next_pkt_idx; >>> + } while (cur_idx !=3D INVALID_ARRAY_INDEX); >>> + >>> + /* Fail to find a neighbor, so store the packet into the flow. */ >>> + if (cur_idx =3D=3D tbl->flows[i].start_index) { >>> + /* Insert it before the first packet of the flow */ >>> + item_idx =3D insert_new_item(tbl, pkt, start_time, >>> + INVALID_ARRAY_INDEX, frag_offset, >>> + is_last_frag); >>> + if (item_idx =3D=3D INVALID_ARRAY_INDEX) >>> + return -1; >>> + tbl->items[item_idx].next_pkt_idx =3D cur_idx; >>> + tbl->flows[i].start_index =3D item_idx; >>> + } else { >>> + if (insert_new_item(tbl, pkt, start_time, prev_idx, >>> + frag_offset, is_last_frag) >>> + =3D=3D INVALID_ARRAY_INDEX) >>> + return -1; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static int >>> +gro_udp4_merge_items(struct gro_udp4_tbl *tbl, >>> + uint32_t start_idx) >>> +{ >>> + uint16_t frag_offset; >>> + uint8_t is_last_frag; >>> + int16_t ip_dl; >>> + struct rte_mbuf *pkt; >>> + int cmp; >>> + uint32_t item_idx; >>> + uint16_t hdr_len; >>> + >>> + item_idx =3D tbl->items[start_idx].next_pkt_idx; >>> + while (item_idx !=3D INVALID_ARRAY_INDEX) { >>> + pkt =3D tbl->items[item_idx].firstseg; >>> + hdr_len =3D pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_l= en + >>> + pkt->l3_len; >> >>For non-tunnel packets, mbuf->outer_l3/l2_len are not guaranteed 0. >>I think it's better to pass outer header length as a parameter of >>gro_udp4_merge_item(), like udp_check_neighbor(). > >For udp GRO, we can remove outer_l3/l2_len, they are nonsense. > >> >>> + ip_dl =3D pkt->pkt_len - hdr_len; >>> + frag_offset =3D tbl->items[item_idx].frag_offset; >>> + is_last_frag =3D tbl->items[item_idx].is_last_frag; >>> + cmp =3D udp_check_neighbor(&(tbl->items[start_idx]), >>> + frag_offset, ip_dl, 0); >>> + if (cmp) { >>> + if (merge_two_udp4_packets( >>> + &(tbl->items[start_idx]), >>> + pkt, cmp, frag_offset, >>> + is_last_frag, 0)) { >>> + item_idx =3D delete_item(tbl, item_idx, >>> + INVALID_ARRAY_INDEX); >>> + tbl->items[start_idx].next_pkt_idx >>> + =3D item_idx; >>> + } else { >>> + return 0; >>> + } >>> + } else { >>> + return 0; >>> + } >> >>A single line doesn't need braces. > >Got it, will remove { } > >> >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +uint16_t >>> +gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, >>> + uint64_t flush_timestamp, >>> + struct rte_mbuf **out, >>> + uint16_t nb_out) >>> +{ >>> + uint16_t k =3D 0; >>> + uint32_t i, j; >>> + uint32_t max_flow_num =3D tbl->max_flow_num; >>> + >>> + for (i =3D 0; i < max_flow_num; i++) { >>> + if (unlikely(tbl->flow_num =3D=3D 0)) >>> + return k; >>> + >>> + j =3D tbl->flows[i].start_index; >>> + while (j !=3D INVALID_ARRAY_INDEX) { >>> + if (tbl->items[j].start_time <=3D flush_timestamp) { >>> + gro_udp4_merge_items(tbl, j); >>> + out[k++] =3D tbl->items[j].firstseg; >>> + if (tbl->items[j].nb_merged > 1) >>> + update_header(&(tbl->items[j])); >>> + /* >>> + * Delete the packet and get the next >>> + * packet in the flow. >>> + */ >>> + j =3D delete_item(tbl, j, INVALID_ARRAY_INDEX= ); >>> + tbl->flows[i].start_index =3D j; >>> + if (j =3D=3D INVALID_ARRAY_INDEX) >>> + tbl->flow_num--; >>> + >>> + if (unlikely(k =3D=3D nb_out)) >>> + return k; >>> + } else >>> + /* >>> + * The left packets in this flow won't be >>> + * timeout. Go to check other flows. >>> + */ >>> + break; >>> + } >>> + } >>> + return k; >>> +} >>> + >>> +uint32_t >>> +gro_udp4_tbl_pkt_count(void *tbl) >>> +{ >>> + struct gro_udp4_tbl *gro_tbl =3D tbl; >>> + >>> + if (gro_tbl) >>> + return gro_tbl->item_num; >>> + >>> + return 0; >>> +} >>> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h >>> new file mode 100644 >>> index 0000000..bc67eb1 >>> --- /dev/null >>> +++ b/lib/librte_gro/gro_udp4.h >>> @@ -0,0 +1,281 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2020 Inspur Corporation >>> + */ >>> + >>> +#ifndef _GRO_UDP4_H_ >>> +#define _GRO_UDP4_H_ >>> + >>> +#include >>> +#include >>> + >>> +#define INVALID_ARRAY_INDEX 0xffffffffUL >>> +#define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) >>> + >>> +/* >>> + * The max length of a IPv4 packet, which includes the length of the L= 3 >>> + * header, the L4 header and the data payload. >>> + */ >>> +#define MAX_IPV4_PKT_LENGTH UINT16_MAX >>> + >>> +/* Header fields representing a UDP/IPv4 flow */ >>> +struct udp4_flow_key { >>> + struct rte_ether_addr eth_saddr; >>> + struct rte_ether_addr eth_daddr; >>> + uint32_t ip_src_addr; >>> + uint32_t ip_dst_addr; >>> + >>> + /* IP fragment for UDP does not contain UDP header >>> + * except the first one. But IP ID must be same. >>> + */ >>> + uint16_t ip_id; >>> +}; >>> + >>> +struct gro_udp4_flow { >>> + struct udp4_flow_key key; >>> + /* >>> + * The index of the first packet in the flow. >>> + * INVALID_ARRAY_INDEX indicates an empty flow. >>> + */ >>> + uint32_t start_index; >>> +}; >>> + >>> +struct gro_udp4_item { >>> + /* >>> + * The first MBUF segment of the packet. If the value >>> + * is NULL, it means the item is empty. >>> + */ >>> + struct rte_mbuf *firstseg; >>> + /* The last MBUF segment of the packet */ >>> + struct rte_mbuf *lastseg; >>> + /* >>> + * The time when the first packet is inserted into the table. >>> + * This value won't be updated, even if the packet is merged >>> + * with other packets. >>> + */ >>> + uint64_t start_time; >>> + /* >>> + * next_pkt_idx is used to chain the packets that >>> + * are in the same flow but can't be merged together >>> + * (e.g. caused by packet reordering). >>> + */ >>> + uint32_t next_pkt_idx; >>> + /* offset of IP fragment packet */ >>> + uint16_t frag_offset; >>> + /* is last IP fragment? */ >>> + uint8_t is_last_frag; >>> + /* the number of merged packets */ >>> + uint16_t nb_merged; >>> +}; >>> + >>> +/* >>> + * UDP/IPv4 reassembly table structure. >>> + */ >>> +struct gro_udp4_tbl { >>> + /* item array */ >>> + struct gro_udp4_item *items; >>> + /* flow array */ >>> + struct gro_udp4_flow *flows; >>> + /* current item number */ >>> + uint32_t item_num; >>> + /* current flow num */ >>> + uint32_t flow_num; >>> + /* item array size */ >>> + uint32_t max_item_num; >>> + /* flow array size */ >>> + uint32_t max_flow_num; >>> +}; >>> + >>> +/** >>> + * This function creates a UDP/IPv4 reassembly table. >>> + * >>> + * @param socket_id >>> + * Socket index for allocating the UDP/IPv4 reassemble table >>> + * @param max_flow_num >>> + * The maximum number of flows in the UDP/IPv4 GRO table >>> + * @param max_item_per_flow >>> + * The maximum number of packets per flow >>> + * >>> + * @return >>> + * - Return the table pointer on success. >>> + * - Return NULL on failure. >>> + */ >>> +void *gro_udp4_tbl_create(uint16_t socket_id, >>> + uint16_t max_flow_num, >>> + uint16_t max_item_per_flow); >>> + >>> +/** >>> + * This function destroys a UDP/IPv4 reassembly table. >>> + * >>> + * @param tbl >>> + * Pointer pointing to the UDP/IPv4 reassembly table. >>> + */ >>> +void gro_udp4_tbl_destroy(void *tbl); >>> + >>> +/** >>> + * This function merges a UDP/IPv4 packet. >>> + * >>> + * This function does not check if the packet has correct checksums an= d >>> + * does not re-calculate checksums for the merged packet. It returns t= he >>> + * packet if it isn't UDP fragment or there is no available space in >>> + * the table. >>> + * >>> + * @param pkt >>> + * Packet to reassemble >>> + * @param tbl >>> + * Pointer pointing to the UDP/IPv4 reassembly table >>> + * @start_time >>> + * The time when the packet is inserted into the table >>> + * >>> + * @return >>> + * - Return a positive value if the packet is merged. >>> + * - Return zero if the packet isn't merged but stored in the table. >>> + * - Return a negative value for invalid parameters or no available >>> + * space in the table. >>> + */ >>> +int32_t gro_udp4_reassemble(struct rte_mbuf *pkt, >>> + struct gro_udp4_tbl *tbl, >>> + uint64_t start_time); >>> + >>> +/** >>> + * This function flushes timeout packets in a UDP/IPv4 reassembly tabl= e, >>> + * and without updating checksums. >>> + * >>> + * @param tbl >>> + * UDP/IPv4 reassembly table pointer >>> + * @param flush_timestamp >>> + * Flush packets which are inserted into the table before or at the >>> + * flush_timestamp. >>> + * @param out >>> + * Pointer array used to keep flushed packets >>> + * @param nb_out >>> + * The element number in 'out'. It also determines the maximum number= of >>> + * packets that can be flushed finally. >>> + * >>> + * @return >>> + * The number of flushed packets >>> + */ >>> +uint16_t gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, >>> + uint64_t flush_timestamp, >>> + struct rte_mbuf **out, >>> + uint16_t nb_out); >>> + >>> +/** >>> + * This function returns the number of the packets in a UDP/IPv4 >>> + * reassembly table. >>> + * >>> + * @param tbl >>> + * UDP/IPv4 reassembly table pointer >>> + * >>> + * @return >>> + * The number of packets in the table >>> + */ >>> +uint32_t gro_udp4_tbl_pkt_count(void *tbl); >>> + >>> +/* >>> + * Check if two UDP/IPv4 packets belong to the same flow. >>> + */ >>> +static inline int >>> +is_same_udp4_flow(struct udp4_flow_key k1, struct udp4_flow_key k2) >>> +{ >>> + return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && >>> + rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &= & >>> + (k1.ip_src_addr =3D=3D k2.ip_src_addr) && >>> + (k1.ip_dst_addr =3D=3D k2.ip_dst_addr) && >>> + (k1.ip_id =3D=3D k2.ip_id)); >>> +} >>> + >>> +/* >>> + * Merge two UDP/IPv4 packets without updating checksums. >>> + * If cmp is larger than 0, append the new packet to the >>> + * original packet. Otherwise, pre-pend the new packet to >>> + * the original packet. >>> + */ >>> +static inline int >>> +merge_two_udp4_packets(struct gro_udp4_item *item, >>> + struct rte_mbuf *pkt, >>> + int cmp, >>> + uint16_t frag_offset, >>> + uint8_t is_last_frag, >>> + uint16_t l2_offset) >>> +{ >>> + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; >>> + uint16_t hdr_len, l2_len; >>> + uint32_t ip_len; >>> + >>> + if (cmp > 0) { >>> + pkt_head =3D item->firstseg; >>> + pkt_tail =3D pkt; >>> + } else { >>> + pkt_head =3D pkt; >>> + pkt_tail =3D item->firstseg; >>> + } >>> + >>> + /* check if the IPv4 packet length is greater than the max value */ >>> + hdr_len =3D l2_offset + pkt_head->l2_len + pkt_head->l3_len; >>> + l2_len =3D l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len= ; >>> + ip_len =3D pkt_head->pkt_len - l2_len >>> + + pkt_tail->pkt_len - hdr_len; >>> + if (unlikely(ip_len > MAX_IPV4_PKT_LENGTH)) >>> + return 0; >>> + >>> + /* remove the packet header for the tail packet */ >>> + rte_pktmbuf_adj(pkt_tail, hdr_len); >>> + >>> + /* chain two packets together */ >>> + if (cmp > 0) { >>> + item->lastseg->next =3D pkt; >>> + item->lastseg =3D rte_pktmbuf_lastseg(pkt); >>> + } else { >>> + lastseg =3D rte_pktmbuf_lastseg(pkt); >>> + lastseg->next =3D item->firstseg; >>> + item->firstseg =3D pkt; >>> + item->frag_offset =3D frag_offset; >>> + } >>> + item->nb_merged++; >>> + if (is_last_frag) >>> + item->is_last_frag =3D is_last_frag; >>> + >>> + /* update MBUF metadata for the merged packet */ >>> + pkt_head->nb_segs +=3D pkt_tail->nb_segs; >>> + pkt_head->pkt_len +=3D pkt_tail->pkt_len; >>> + >>> + return 1; >>> +} >>> + >>> +/* >>> + * Check if two UDP/IPv4 packets are neighbors. >>> + */ >>> +static inline int >>> +udp_check_neighbor(struct gro_udp4_item *item, >>> + uint16_t frag_offset, >>> + uint16_t ip_dl, >>> + uint16_t l2_offset) >> >>It's better to rename the function as udp4_check_neighbor(), IMO. > >No problem > >> >>> +{ >>> + struct rte_mbuf *pkt_orig =3D item->firstseg; >>> + uint16_t len; >>> + >>> + /* check if the two packets are neighbors */ >>> + len =3D pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - >>> + pkt_orig->l3_len; >>> + if (frag_offset =3D=3D item->frag_offset + len) >>> + /* append the new packet */ >>> + return 1; >>> + else if (frag_offset + ip_dl =3D=3D item->frag_offset) >>> + /* pre-pend the new packet */ >>> + return -1; >>> + >>> + return 0; >>> +} >>> + >>> +static inline int >>> +is_ipv4_fragment(const struct rte_ipv4_hdr *hdr) >>> +{ >>> + uint16_t flag_offset, ip_flag, ip_ofs; >>> + >>> + flag_offset =3D rte_be_to_cpu_16(hdr->fragment_offset); >>> + ip_ofs =3D (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); >>> + ip_flag =3D (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG); >>> + >>> + return ip_flag !=3D 0 || ip_ofs !=3D 0; >>> +} >>> +#endif >>> diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build >>> index 501668c..0d18dc2 100644 >>> --- a/lib/librte_gro/meson.build >>> +++ b/lib/librte_gro/meson.build >>> @@ -1,6 +1,6 @@ >>> # SPDX-License-Identifier: BSD-3-Clause >>> # Copyright(c) 2017 Intel Corporation >>> >>> -sources =3D files('rte_gro.c', 'gro_tcp4.c', 'gro_vxlan_tcp4.c') >>> +sources =3D files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_= tcp4.c') >>> headers =3D files('rte_gro.h') >>> deps +=3D ['ethdev'] >>> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c >>> index 6618f4d..d094129 100644 >>> --- a/lib/librte_gro/rte_gro.c >>> +++ b/lib/librte_gro/rte_gro.c >>> @@ -9,6 +9,7 @@ >>> >>> #include "rte_gro.h" >>> #include "gro_tcp4.h" >>> +#include "gro_udp4.h" >>> #include "gro_vxlan_tcp4.h" >>> >>> typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, >>> @@ -18,17 +19,23 @@ >>> typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); >>> >>> static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] =3D { >>> - gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL}; >>> + gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, >>> + gro_udp4_tbl_create, NULL}; >>> static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] =3D { >>> gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy, >>> + gro_udp4_tbl_destroy, >>> NULL}; >>> static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] =3D= { >>> gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count, >>> + gro_udp4_tbl_pkt_count, >>> NULL}; >>> >>> #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >>> ((ptype & RTE_PTYPE_L4_TCP) =3D=3D RTE_PTYPE_L4_TCP)) >>> >>> +#define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >>> + ((ptype & RTE_PTYPE_L4_UDP) =3D=3D RTE_PTYPE_L4_UDP)) >>> + >>> #define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ >>> ((ptype & RTE_PTYPE_L4_UDP) =3D=3D RTE_PTYPE_L4_UDP) && \ >>> ((ptype & RTE_PTYPE_TUNNEL_VXLAN) =3D=3D \ >>> @@ -40,6 +47,7 @@ >>> RTE_PTYPE_INNER_L3_IPV4_EXT | \ >>> RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) !=3D 0)) >>> >>> + >>> /* >>> * GRO context structure. It keeps the table structures, which are >>> * used to merge packets, for different GRO types. Before using >>> @@ -123,20 +131,26 @@ struct gro_ctx { >>> struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >>> struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =3D {{0} = }; >>> >>> - /* Allocate a reassembly table for VXLAN GRO */ >>> + /* allocate a reassembly table for UDP/IPv4 GRO */ >>> + struct gro_udp4_tbl udp_tbl; >>> + struct gro_udp4_flow udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >>> + struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] =3D {{0} = }; >>> + >>> + /* Allocate a reassembly table for VXLAN TCP GRO */ >>> struct gro_vxlan_tcp4_tbl vxlan_tbl; >>> struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; >>> - struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = =3D { >>> - {{0}, 0, 0} }; >>> + struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] >>> + =3D {{{0}, 0, 0} }; >>> >>> struct rte_mbuf *unprocess_pkts[nb_pkts]; >>> uint32_t item_num; >>> int32_t ret; >>> uint16_t i, unprocess_num =3D 0, nb_after_gro =3D nb_pkts; >>> - uint8_t do_tcp4_gro =3D 0, do_vxlan_gro =3D 0; >>> + uint8_t do_tcp4_gro =3D 0, do_vxlan_gro =3D 0, do_udp4_gro =3D 0; >>> >>> if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | >>> - RTE_GRO_TCP_IPV4)) =3D=3D 0)) >>> + RTE_GRO_TCP_IPV4 | >>> + RTE_GRO_UDP_IPV4)) =3D=3D 0)) >>> return nb_pkts; >>> >>> /* Get the maximum number of packets */ >>> @@ -170,6 +184,20 @@ struct gro_ctx { >>> do_tcp4_gro =3D 1; >>> } >>> >>> + if (param->gro_types & RTE_GRO_UDP_IPV4) { >>> + for (i =3D 0; i < item_num; i++) >>> + udp_flows[i].start_index =3D INVALID_ARRAY_INDEX; >>> + >>> + udp_tbl.flows =3D udp_flows; >>> + udp_tbl.items =3D udp_items; >>> + udp_tbl.flow_num =3D 0; >>> + udp_tbl.item_num =3D 0; >>> + udp_tbl.max_flow_num =3D item_num; >>> + udp_tbl.max_item_num =3D item_num; >>> + do_udp4_gro =3D 1; >>> + } >>> + >>> + >>> for (i =3D 0; i < nb_pkts; i++) { >>> /* >>> * The timestamp is ignored, since all packets >>> @@ -177,7 +205,8 @@ struct gro_ctx { >>> */ >>> if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) && >>> do_vxlan_gro) { >>> - ret =3D gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl= , 0); >>> + ret =3D gro_vxlan_tcp4_reassemble(pkts[i], >>> + &vxlan_tbl, 0); >>> if (ret > 0) >>> /* Merge successfully */ >>> nb_after_gro--; >>> @@ -191,27 +220,43 @@ struct gro_ctx { >>> nb_after_gro--; >>> else if (ret < 0) >>> unprocess_pkts[unprocess_num++] =3D pkts[i]; >>> + } else if (IS_IPV4_UDP_PKT(pkts[i]->packet_type) && >>> + do_udp4_gro) { >>> + ret =3D gro_udp4_reassemble(pkts[i], &udp_tbl, 0); >>> + if (ret > 0) >>> + /* merge successfully */ >>> + nb_after_gro--; >>> + else if (ret < 0) >>> + unprocess_pkts[unprocess_num++] =3D pkts[i]; >>> } else >>> unprocess_pkts[unprocess_num++] =3D pkts[i]; >>> } >>> >>> - if (nb_after_gro < nb_pkts) { >>> + if ((nb_after_gro < nb_pkts) >>> + || (unprocess_num < nb_pkts)) { >> >>Why need to check unprocess_num here? > >In the case that packet isn't merged, nb_after_gro won't be changed, we on= ly can use unprocess_num to check, it is possible to reassmble them success= fully in flush phase. > >> >>> i =3D 0; >>> /* Flush all packets from the tables */ >>> if (do_vxlan_gro) { >>> i =3D gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl, >>> 0, pkts, nb_pkts); >>> } >>> + >>> if (do_tcp4_gro) { >>> i +=3D gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, >>> &pkts[i], nb_pkts - i); >>> } >>> + >>> + if (do_udp4_gro) { >>> + i +=3D gro_udp4_tbl_timeout_flush(&udp_tbl, 0, >>> + &pkts[i], nb_pkts - i); >>> + } >>> /* Copy unprocessed packets */ >>> if (unprocess_num > 0) { >>> memcpy(&pkts[i], unprocess_pkts, >>> sizeof(struct rte_mbuf *) * >>> unprocess_num); >>> } >>> + nb_after_gro =3D i + unprocess_num; >>> } >>> >>> return nb_after_gro; >>> 1.8.3.1 > >