From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3116FA04AA; Tue, 8 Sep 2020 03:55:32 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id EE27E1C11A; Tue, 8 Sep 2020 03:55:31 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 2252A1C116 for ; Tue, 8 Sep 2020 03:55:29 +0200 (CEST) IronPort-SDR: 4mKCcTUmf1UhZfC2dQS9PQmHx2IhCuJwtZaUGGKcXO1cGbq6eOPyMjW1t1Ant/mAa0jhe/zU+M 7dnH+IPCuoVw== X-IronPort-AV: E=McAfee;i="6000,8403,9737"; a="145783510" X-IronPort-AV: E=Sophos;i="5.76,404,1592895600"; d="scan'208";a="145783510" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Sep 2020 18:55:28 -0700 IronPort-SDR: Q0ndlv1BoQuTZp324Cg/QZKXNNSCcBpqopyDE5lYv/iRFOOaz4Klz2WQkmIYLX5lHaSUdNwOHD BHCDWKwdqtKA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,404,1592895600"; d="scan'208";a="317013667" Received: from npg_dpdk_virtio_jiayuhu_15.sh.intel.com ([10.67.117.43]) by orsmga002.jf.intel.com with ESMTP; 07 Sep 2020 18:55:26 -0700 Date: Tue, 8 Sep 2020 10:05:16 +0800 From: Jiayu Hu To: yang_y_yi@163.com Cc: thomas@monjalon.net, dev@dpdk.org, yangyi01@inspur.com Message-ID: <20200908020516.GA68491@NPG_DPDK_VIRTIO_jiayuhu_15.sh.intel.com> References: <20200904083740.71272-1-yang_y_yi@163.com> <20200904083740.71272-2-yang_y_yi@163.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20200904083740.71272-2-yang_y_yi@163.com> User-Agent: Mutt/1.7.1 (2016-10-04) Subject: Re: [dpdk-dev] [PATCH v4 1/2] gro: add UDP GRO support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Some comments are inline. Thanks, Jiayu On Fri, Sep 04, 2020 at 04:37:39PM +0800, yang_y_yi@163.com wrote: > From: Yi Yang > > UDP GRO can help improve VM-to-VM UDP performance when > VM is enabled UFO or GSO, GRO must be supported if GSO > or UFO is enabled, otherwise, performance gain will be > hurt. > > With this enabled in DPDK, OVS DPDK can leverage it > to improve VM-to-VM UDP performance, this will make > sure IP fragments will be reassembled once it is > received from physical NIC. It is very helpful in OVS > DPDK VLAN TSO case. > > Signed-off-by: Yi Yang > --- > lib/librte_gro/Makefile | 1 + > lib/librte_gro/gro_udp4.c | 430 +++++++++++++++++++++++++++++++++++++++++++++ > lib/librte_gro/gro_udp4.h | 281 +++++++++++++++++++++++++++++ > lib/librte_gro/meson.build | 2 +- > lib/librte_gro/rte_gro.c | 93 ++++++++-- > lib/librte_gro/rte_gro.h | 5 +- > 6 files changed, 796 insertions(+), 16 deletions(-) > create mode 100644 lib/librte_gro/gro_udp4.c > create mode 100644 lib/librte_gro/gro_udp4.h > > diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile > index e848687..41ec29e 100644 > --- a/lib/librte_gro/Makefile > +++ b/lib/librte_gro/Makefile > @@ -15,6 +15,7 @@ EXPORT_MAP := rte_gro_version.map > # source files > SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c > SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c > +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_udp4.c > SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c > > # install this header file > diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c > new file mode 100644 > index 0000000..25584fd > --- /dev/null > +++ b/lib/librte_gro/gro_udp4.c > @@ -0,0 +1,430 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Inspur Corporation > + */ > + > +#include > +#include > +#include > +#include > + > +#include "gro_udp4.h" > + > +void * > +gro_udp4_tbl_create(uint16_t socket_id, > + uint16_t max_flow_num, > + uint16_t max_item_per_flow) > +{ > + struct gro_udp4_tbl *tbl; > + size_t size; > + uint32_t entries_num, i; > + > + entries_num = max_flow_num * max_item_per_flow; > + entries_num = RTE_MIN(entries_num, GRO_UDP4_TBL_MAX_ITEM_NUM); > + > + if (entries_num == 0) > + return NULL; > + > + tbl = rte_zmalloc_socket(__func__, > + sizeof(struct gro_udp4_tbl), > + RTE_CACHE_LINE_SIZE, > + socket_id); > + if (tbl == NULL) > + return NULL; > + > + size = sizeof(struct gro_udp4_item) * entries_num; > + tbl->items = rte_zmalloc_socket(__func__, > + size, > + RTE_CACHE_LINE_SIZE, > + socket_id); > + if (tbl->items == NULL) { > + rte_free(tbl); > + return NULL; > + } > + tbl->max_item_num = entries_num; > + > + size = sizeof(struct gro_udp4_flow) * entries_num; > + tbl->flows = rte_zmalloc_socket(__func__, > + size, > + RTE_CACHE_LINE_SIZE, > + socket_id); > + if (tbl->flows == NULL) { > + rte_free(tbl->items); > + rte_free(tbl); > + return NULL; > + } > + /* INVALID_ARRAY_INDEX indicates an empty flow */ > + for (i = 0; i < entries_num; i++) > + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; > + tbl->max_flow_num = entries_num; > + > + return tbl; > +} > + > +void > +gro_udp4_tbl_destroy(void *tbl) > +{ > + struct gro_udp4_tbl *udp_tbl = tbl; > + > + if (udp_tbl) { > + rte_free(udp_tbl->items); > + rte_free(udp_tbl->flows); > + } > + rte_free(udp_tbl); > +} > + > +static inline uint32_t > +find_an_empty_item(struct gro_udp4_tbl *tbl) > +{ > + uint32_t i; > + uint32_t max_item_num = tbl->max_item_num; > + > + for (i = 0; i < max_item_num; i++) > + if (tbl->items[i].firstseg == NULL) > + return i; > + return INVALID_ARRAY_INDEX; > +} > + > +static inline uint32_t > +find_an_empty_flow(struct gro_udp4_tbl *tbl) > +{ > + uint32_t i; > + uint32_t max_flow_num = tbl->max_flow_num; > + > + for (i = 0; i < max_flow_num; i++) > + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) > + return i; > + return INVALID_ARRAY_INDEX; > +} > + > +static inline uint32_t > +insert_new_item(struct gro_udp4_tbl *tbl, > + struct rte_mbuf *pkt, > + uint64_t start_time, > + uint32_t prev_idx, > + uint16_t frag_offset, > + uint8_t is_last_frag) > +{ > + uint32_t item_idx; > + > + item_idx = find_an_empty_item(tbl); > + if (item_idx == INVALID_ARRAY_INDEX) > + return INVALID_ARRAY_INDEX; > + > + tbl->items[item_idx].firstseg = pkt; > + tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); > + tbl->items[item_idx].start_time = start_time; > + tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; > + tbl->items[item_idx].frag_offset = frag_offset; > + tbl->items[item_idx].is_last_frag = is_last_frag; > + tbl->items[item_idx].nb_merged = 1; > + tbl->item_num++; > + > + /* if the previous packet exists, chain them together. */ > + if (prev_idx != INVALID_ARRAY_INDEX) { > + tbl->items[item_idx].next_pkt_idx = > + tbl->items[prev_idx].next_pkt_idx; > + tbl->items[prev_idx].next_pkt_idx = item_idx; > + } > + > + return item_idx; > +} > + > +static inline uint32_t > +delete_item(struct gro_udp4_tbl *tbl, uint32_t item_idx, > + uint32_t prev_item_idx) > +{ > + uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; > + > + /* NULL indicates an empty item */ > + tbl->items[item_idx].firstseg = NULL; > + tbl->item_num--; > + if (prev_item_idx != INVALID_ARRAY_INDEX) > + tbl->items[prev_item_idx].next_pkt_idx = next_idx; > + > + return next_idx; > +} > + > +static inline uint32_t > +insert_new_flow(struct gro_udp4_tbl *tbl, > + struct udp4_flow_key *src, > + uint32_t item_idx) > +{ > + struct udp4_flow_key *dst; > + uint32_t flow_idx; > + > + flow_idx = find_an_empty_flow(tbl); > + if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) > + return INVALID_ARRAY_INDEX; > + > + dst = &(tbl->flows[flow_idx].key); > + > + rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); > + rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); > + dst->ip_src_addr = src->ip_src_addr; > + dst->ip_dst_addr = src->ip_dst_addr; > + dst->ip_id = src->ip_id; > + dst->ip_id = src->ip_id; Duplicated code. > + > + tbl->flows[flow_idx].start_index = item_idx; > + tbl->flow_num++; > + > + return flow_idx; > +} > + > +/* > + * update the packet length for the flushed packet. > + */ > +static inline void > +update_header(struct gro_udp4_item *item) > +{ > + struct rte_ipv4_hdr *ipv4_hdr; > + struct rte_mbuf *pkt = item->firstseg; > + uint16_t frag_offset; > + > + ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + > + pkt->l2_len); > + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - > + pkt->l2_len); > + > + /* Clear MF bit if it is last fragment */ > + if (item->is_last_frag) { > + frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); > + ipv4_hdr->fragment_offset = > + rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG); > + } > +} > + > +int32_t > +gro_udp4_reassemble(struct rte_mbuf *pkt, > + struct gro_udp4_tbl *tbl, > + uint64_t start_time) > +{ > + struct rte_ether_hdr *eth_hdr; > + struct rte_ipv4_hdr *ipv4_hdr; > + uint16_t ip_dl; > + uint16_t ip_id, hdr_len; > + uint16_t frag_offset = 0; > + uint8_t is_last_frag; > + > + struct udp4_flow_key key; > + uint32_t cur_idx, prev_idx, item_idx; > + uint32_t i, max_flow_num, remaining_flow_num; > + int cmp; > + uint8_t find; > + > + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); > + ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); > + hdr_len = pkt->l2_len + pkt->l3_len; > + > + /* > + * Don't process non-fragment packet. > + */ > + if (!is_ipv4_fragment(ipv4_hdr)) > + return -1; > + > + /* > + * Don't process the packet whose payload length is less than or > + * equal to 0. > + */ > + if (pkt->pkt_len - hdr_len <= 0) > + return -1; If input packets are malicious, whose mbuf->pkt_len is smaller than (mbuf->l2_len+mbuf->l3_len), the above check wotn't work correctly, as its value is uint16_t, which is always positive. > + > + ip_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len; Same as above. > + ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); > + frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); > + is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0; > + frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3; > + > + rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); > + rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr)); > + key.ip_src_addr = ipv4_hdr->src_addr; > + key.ip_dst_addr = ipv4_hdr->dst_addr; > + key.ip_id = ip_id; > + > + /* Search for a matched flow. */ > + max_flow_num = tbl->max_flow_num; > + remaining_flow_num = tbl->flow_num; > + find = 0; > + for (i = 0; i < max_flow_num && remaining_flow_num; i++) { > + if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { > + if (is_same_udp4_flow(tbl->flows[i].key, key)) { > + find = 1; > + break; > + } > + remaining_flow_num--; > + } > + } > + > + /* > + * Fail to find a matched flow. Insert a new flow and store the > + * packet into the flow. > + */ > + if (find == 0) { > + item_idx = insert_new_item(tbl, pkt, start_time, > + INVALID_ARRAY_INDEX, frag_offset, > + is_last_frag); > + if (item_idx == INVALID_ARRAY_INDEX) > + return -1; Adding unlikely here could get better performance, IMO. > + if (insert_new_flow(tbl, &key, item_idx) == > + INVALID_ARRAY_INDEX) { > + /* > + * Fail to insert a new flow, so delete the > + * stored packet. > + */ > + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); > + return -1; > + } > + return 0; > + } > + > + /* > + * Check all packets in the flow and try to find a neighbor for > + * the input packet. > + */ > + cur_idx = tbl->flows[i].start_index; > + prev_idx = cur_idx; > + do { > + cmp = udp_check_neighbor(&(tbl->items[cur_idx]), > + frag_offset, ip_dl, 0); > + if (cmp) { > + if (merge_two_udp4_packets(&(tbl->items[cur_idx]), > + pkt, cmp, frag_offset, > + is_last_frag, 0)) > + return 1; > + /* > + * Fail to merge the two packets, as the packet > + * length is greater than the max value. Store > + * the packet into the flow. > + */ > + if (insert_new_item(tbl, pkt, start_time, prev_idx, > + frag_offset, is_last_frag) == > + INVALID_ARRAY_INDEX) > + return -1; > + return 0; > + } > + > + /* Ensure inserted items are ordered by frag_offset */ > + if (frag_offset > + < tbl->items[cur_idx].frag_offset) { > + break; > + } > + > + prev_idx = cur_idx; > + cur_idx = tbl->items[cur_idx].next_pkt_idx; > + } while (cur_idx != INVALID_ARRAY_INDEX); > + > + /* Fail to find a neighbor, so store the packet into the flow. */ > + if (cur_idx == tbl->flows[i].start_index) { > + /* Insert it before the first packet of the flow */ > + item_idx = insert_new_item(tbl, pkt, start_time, > + INVALID_ARRAY_INDEX, frag_offset, > + is_last_frag); > + if (item_idx == INVALID_ARRAY_INDEX) > + return -1; > + tbl->items[item_idx].next_pkt_idx = cur_idx; > + tbl->flows[i].start_index = item_idx; > + } else { > + if (insert_new_item(tbl, pkt, start_time, prev_idx, > + frag_offset, is_last_frag) > + == INVALID_ARRAY_INDEX) > + return -1; > + } > + > + return 0; > +} > + > +static int > +gro_udp4_merge_items(struct gro_udp4_tbl *tbl, > + uint32_t start_idx) > +{ > + uint16_t frag_offset; > + uint8_t is_last_frag; > + int16_t ip_dl; > + struct rte_mbuf *pkt; > + int cmp; > + uint32_t item_idx; > + uint16_t hdr_len; > + > + item_idx = tbl->items[start_idx].next_pkt_idx; > + while (item_idx != INVALID_ARRAY_INDEX) { > + pkt = tbl->items[item_idx].firstseg; > + hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len + > + pkt->l3_len; For non-tunnel packets, mbuf->outer_l3/l2_len are not guaranteed 0. I think it's better to pass outer header length as a parameter of gro_udp4_merge_item(), like udp_check_neighbor(). > + ip_dl = pkt->pkt_len - hdr_len; > + frag_offset = tbl->items[item_idx].frag_offset; > + is_last_frag = tbl->items[item_idx].is_last_frag; > + cmp = udp_check_neighbor(&(tbl->items[start_idx]), > + frag_offset, ip_dl, 0); > + if (cmp) { > + if (merge_two_udp4_packets( > + &(tbl->items[start_idx]), > + pkt, cmp, frag_offset, > + is_last_frag, 0)) { > + item_idx = delete_item(tbl, item_idx, > + INVALID_ARRAY_INDEX); > + tbl->items[start_idx].next_pkt_idx > + = item_idx; > + } else { > + return 0; > + } > + } else { > + return 0; > + } A single line doesn't need braces. > + } > + > + return 0; > +} > + > +uint16_t > +gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, > + uint64_t flush_timestamp, > + struct rte_mbuf **out, > + uint16_t nb_out) > +{ > + uint16_t k = 0; > + uint32_t i, j; > + uint32_t max_flow_num = tbl->max_flow_num; > + > + for (i = 0; i < max_flow_num; i++) { > + if (unlikely(tbl->flow_num == 0)) > + return k; > + > + j = tbl->flows[i].start_index; > + while (j != INVALID_ARRAY_INDEX) { > + if (tbl->items[j].start_time <= flush_timestamp) { > + gro_udp4_merge_items(tbl, j); > + out[k++] = tbl->items[j].firstseg; > + if (tbl->items[j].nb_merged > 1) > + update_header(&(tbl->items[j])); > + /* > + * Delete the packet and get the next > + * packet in the flow. > + */ > + j = delete_item(tbl, j, INVALID_ARRAY_INDEX); > + tbl->flows[i].start_index = j; > + if (j == INVALID_ARRAY_INDEX) > + tbl->flow_num--; > + > + if (unlikely(k == nb_out)) > + return k; > + } else > + /* > + * The left packets in this flow won't be > + * timeout. Go to check other flows. > + */ > + break; > + } > + } > + return k; > +} > + > +uint32_t > +gro_udp4_tbl_pkt_count(void *tbl) > +{ > + struct gro_udp4_tbl *gro_tbl = tbl; > + > + if (gro_tbl) > + return gro_tbl->item_num; > + > + return 0; > +} > diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h > new file mode 100644 > index 0000000..bc67eb1 > --- /dev/null > +++ b/lib/librte_gro/gro_udp4.h > @@ -0,0 +1,281 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Inspur Corporation > + */ > + > +#ifndef _GRO_UDP4_H_ > +#define _GRO_UDP4_H_ > + > +#include > +#include > + > +#define INVALID_ARRAY_INDEX 0xffffffffUL > +#define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) > + > +/* > + * The max length of a IPv4 packet, which includes the length of the L3 > + * header, the L4 header and the data payload. > + */ > +#define MAX_IPV4_PKT_LENGTH UINT16_MAX > + > +/* Header fields representing a UDP/IPv4 flow */ > +struct udp4_flow_key { > + struct rte_ether_addr eth_saddr; > + struct rte_ether_addr eth_daddr; > + uint32_t ip_src_addr; > + uint32_t ip_dst_addr; > + > + /* IP fragment for UDP does not contain UDP header > + * except the first one. But IP ID must be same. > + */ > + uint16_t ip_id; > +}; > + > +struct gro_udp4_flow { > + struct udp4_flow_key key; > + /* > + * The index of the first packet in the flow. > + * INVALID_ARRAY_INDEX indicates an empty flow. > + */ > + uint32_t start_index; > +}; > + > +struct gro_udp4_item { > + /* > + * The first MBUF segment of the packet. If the value > + * is NULL, it means the item is empty. > + */ > + struct rte_mbuf *firstseg; > + /* The last MBUF segment of the packet */ > + struct rte_mbuf *lastseg; > + /* > + * The time when the first packet is inserted into the table. > + * This value won't be updated, even if the packet is merged > + * with other packets. > + */ > + uint64_t start_time; > + /* > + * next_pkt_idx is used to chain the packets that > + * are in the same flow but can't be merged together > + * (e.g. caused by packet reordering). > + */ > + uint32_t next_pkt_idx; > + /* offset of IP fragment packet */ > + uint16_t frag_offset; > + /* is last IP fragment? */ > + uint8_t is_last_frag; > + /* the number of merged packets */ > + uint16_t nb_merged; > +}; > + > +/* > + * UDP/IPv4 reassembly table structure. > + */ > +struct gro_udp4_tbl { > + /* item array */ > + struct gro_udp4_item *items; > + /* flow array */ > + struct gro_udp4_flow *flows; > + /* current item number */ > + uint32_t item_num; > + /* current flow num */ > + uint32_t flow_num; > + /* item array size */ > + uint32_t max_item_num; > + /* flow array size */ > + uint32_t max_flow_num; > +}; > + > +/** > + * This function creates a UDP/IPv4 reassembly table. > + * > + * @param socket_id > + * Socket index for allocating the UDP/IPv4 reassemble table > + * @param max_flow_num > + * The maximum number of flows in the UDP/IPv4 GRO table > + * @param max_item_per_flow > + * The maximum number of packets per flow > + * > + * @return > + * - Return the table pointer on success. > + * - Return NULL on failure. > + */ > +void *gro_udp4_tbl_create(uint16_t socket_id, > + uint16_t max_flow_num, > + uint16_t max_item_per_flow); > + > +/** > + * This function destroys a UDP/IPv4 reassembly table. > + * > + * @param tbl > + * Pointer pointing to the UDP/IPv4 reassembly table. > + */ > +void gro_udp4_tbl_destroy(void *tbl); > + > +/** > + * This function merges a UDP/IPv4 packet. > + * > + * This function does not check if the packet has correct checksums and > + * does not re-calculate checksums for the merged packet. It returns the > + * packet if it isn't UDP fragment or there is no available space in > + * the table. > + * > + * @param pkt > + * Packet to reassemble > + * @param tbl > + * Pointer pointing to the UDP/IPv4 reassembly table > + * @start_time > + * The time when the packet is inserted into the table > + * > + * @return > + * - Return a positive value if the packet is merged. > + * - Return zero if the packet isn't merged but stored in the table. > + * - Return a negative value for invalid parameters or no available > + * space in the table. > + */ > +int32_t gro_udp4_reassemble(struct rte_mbuf *pkt, > + struct gro_udp4_tbl *tbl, > + uint64_t start_time); > + > +/** > + * This function flushes timeout packets in a UDP/IPv4 reassembly table, > + * and without updating checksums. > + * > + * @param tbl > + * UDP/IPv4 reassembly table pointer > + * @param flush_timestamp > + * Flush packets which are inserted into the table before or at the > + * flush_timestamp. > + * @param out > + * Pointer array used to keep flushed packets > + * @param nb_out > + * The element number in 'out'. It also determines the maximum number of > + * packets that can be flushed finally. > + * > + * @return > + * The number of flushed packets > + */ > +uint16_t gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl, > + uint64_t flush_timestamp, > + struct rte_mbuf **out, > + uint16_t nb_out); > + > +/** > + * This function returns the number of the packets in a UDP/IPv4 > + * reassembly table. > + * > + * @param tbl > + * UDP/IPv4 reassembly table pointer > + * > + * @return > + * The number of packets in the table > + */ > +uint32_t gro_udp4_tbl_pkt_count(void *tbl); > + > +/* > + * Check if two UDP/IPv4 packets belong to the same flow. > + */ > +static inline int > +is_same_udp4_flow(struct udp4_flow_key k1, struct udp4_flow_key k2) > +{ > + return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && > + rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && > + (k1.ip_src_addr == k2.ip_src_addr) && > + (k1.ip_dst_addr == k2.ip_dst_addr) && > + (k1.ip_id == k2.ip_id)); > +} > + > +/* > + * Merge two UDP/IPv4 packets without updating checksums. > + * If cmp is larger than 0, append the new packet to the > + * original packet. Otherwise, pre-pend the new packet to > + * the original packet. > + */ > +static inline int > +merge_two_udp4_packets(struct gro_udp4_item *item, > + struct rte_mbuf *pkt, > + int cmp, > + uint16_t frag_offset, > + uint8_t is_last_frag, > + uint16_t l2_offset) > +{ > + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; > + uint16_t hdr_len, l2_len; > + uint32_t ip_len; > + > + if (cmp > 0) { > + pkt_head = item->firstseg; > + pkt_tail = pkt; > + } else { > + pkt_head = pkt; > + pkt_tail = item->firstseg; > + } > + > + /* check if the IPv4 packet length is greater than the max value */ > + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len; > + l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; > + ip_len = pkt_head->pkt_len - l2_len > + + pkt_tail->pkt_len - hdr_len; > + if (unlikely(ip_len > MAX_IPV4_PKT_LENGTH)) > + return 0; > + > + /* remove the packet header for the tail packet */ > + rte_pktmbuf_adj(pkt_tail, hdr_len); > + > + /* chain two packets together */ > + if (cmp > 0) { > + item->lastseg->next = pkt; > + item->lastseg = rte_pktmbuf_lastseg(pkt); > + } else { > + lastseg = rte_pktmbuf_lastseg(pkt); > + lastseg->next = item->firstseg; > + item->firstseg = pkt; > + item->frag_offset = frag_offset; > + } > + item->nb_merged++; > + if (is_last_frag) > + item->is_last_frag = is_last_frag; > + > + /* update MBUF metadata for the merged packet */ > + pkt_head->nb_segs += pkt_tail->nb_segs; > + pkt_head->pkt_len += pkt_tail->pkt_len; > + > + return 1; > +} > + > +/* > + * Check if two UDP/IPv4 packets are neighbors. > + */ > +static inline int > +udp_check_neighbor(struct gro_udp4_item *item, > + uint16_t frag_offset, > + uint16_t ip_dl, > + uint16_t l2_offset) It's better to rename the function as udp4_check_neighbor(), IMO. > +{ > + struct rte_mbuf *pkt_orig = item->firstseg; > + uint16_t len; > + > + /* check if the two packets are neighbors */ > + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - > + pkt_orig->l3_len; > + if (frag_offset == item->frag_offset + len) > + /* append the new packet */ > + return 1; > + else if (frag_offset + ip_dl == item->frag_offset) > + /* pre-pend the new packet */ > + return -1; > + > + return 0; > +} > + > +static inline int > +is_ipv4_fragment(const struct rte_ipv4_hdr *hdr) > +{ > + uint16_t flag_offset, ip_flag, ip_ofs; > + > + flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); > + ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); > + ip_flag = (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG); > + > + return ip_flag != 0 || ip_ofs != 0; > +} > +#endif > diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build > index 501668c..0d18dc2 100644 > --- a/lib/librte_gro/meson.build > +++ b/lib/librte_gro/meson.build > @@ -1,6 +1,6 @@ > # SPDX-License-Identifier: BSD-3-Clause > # Copyright(c) 2017 Intel Corporation > > -sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_vxlan_tcp4.c') > +sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c') > headers = files('rte_gro.h') > deps += ['ethdev'] > diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c > index 6618f4d..d094129 100644 > --- a/lib/librte_gro/rte_gro.c > +++ b/lib/librte_gro/rte_gro.c > @@ -9,6 +9,7 @@ > > #include "rte_gro.h" > #include "gro_tcp4.h" > +#include "gro_udp4.h" > #include "gro_vxlan_tcp4.h" > > typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, > @@ -18,17 +19,23 @@ > typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); > > static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { > - gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL}; > + gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, > + gro_udp4_tbl_create, NULL}; > static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { > gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy, > + gro_udp4_tbl_destroy, > NULL}; > static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { > gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count, > + gro_udp4_tbl_pkt_count, > NULL}; > > #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ > ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP)) > > +#define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ > + ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP)) > + > #define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ > ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \ > ((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \ > @@ -40,6 +47,7 @@ > RTE_PTYPE_INNER_L3_IPV4_EXT | \ > RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0)) > > + > /* > * GRO context structure. It keeps the table structures, which are > * used to merge packets, for different GRO types. Before using > @@ -123,20 +131,26 @@ struct gro_ctx { > struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; > struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; > > - /* Allocate a reassembly table for VXLAN GRO */ > + /* allocate a reassembly table for UDP/IPv4 GRO */ > + struct gro_udp4_tbl udp_tbl; > + struct gro_udp4_flow udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; > + struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; > + > + /* Allocate a reassembly table for VXLAN TCP GRO */ > struct gro_vxlan_tcp4_tbl vxlan_tbl; > struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; > - struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = { > - {{0}, 0, 0} }; > + struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] > + = {{{0}, 0, 0} }; > > struct rte_mbuf *unprocess_pkts[nb_pkts]; > uint32_t item_num; > int32_t ret; > uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts; > - uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0; > + uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0; > > if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | > - RTE_GRO_TCP_IPV4)) == 0)) > + RTE_GRO_TCP_IPV4 | > + RTE_GRO_UDP_IPV4)) == 0)) > return nb_pkts; > > /* Get the maximum number of packets */ > @@ -170,6 +184,20 @@ struct gro_ctx { > do_tcp4_gro = 1; > } > > + if (param->gro_types & RTE_GRO_UDP_IPV4) { > + for (i = 0; i < item_num; i++) > + udp_flows[i].start_index = INVALID_ARRAY_INDEX; > + > + udp_tbl.flows = udp_flows; > + udp_tbl.items = udp_items; > + udp_tbl.flow_num = 0; > + udp_tbl.item_num = 0; > + udp_tbl.max_flow_num = item_num; > + udp_tbl.max_item_num = item_num; > + do_udp4_gro = 1; > + } > + > + > for (i = 0; i < nb_pkts; i++) { > /* > * The timestamp is ignored, since all packets > @@ -177,7 +205,8 @@ struct gro_ctx { > */ > if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) && > do_vxlan_gro) { > - ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0); > + ret = gro_vxlan_tcp4_reassemble(pkts[i], > + &vxlan_tbl, 0); > if (ret > 0) > /* Merge successfully */ > nb_after_gro--; > @@ -191,27 +220,43 @@ struct gro_ctx { > nb_after_gro--; > else if (ret < 0) > unprocess_pkts[unprocess_num++] = pkts[i]; > + } else if (IS_IPV4_UDP_PKT(pkts[i]->packet_type) && > + do_udp4_gro) { > + ret = gro_udp4_reassemble(pkts[i], &udp_tbl, 0); > + if (ret > 0) > + /* merge successfully */ > + nb_after_gro--; > + else if (ret < 0) > + unprocess_pkts[unprocess_num++] = pkts[i]; > } else > unprocess_pkts[unprocess_num++] = pkts[i]; > } > > - if (nb_after_gro < nb_pkts) { > + if ((nb_after_gro < nb_pkts) > + || (unprocess_num < nb_pkts)) { Why need to check unprocess_num here? > i = 0; > /* Flush all packets from the tables */ > if (do_vxlan_gro) { > i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl, > 0, pkts, nb_pkts); > } > + > if (do_tcp4_gro) { > i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, > &pkts[i], nb_pkts - i); > } > + > + if (do_udp4_gro) { > + i += gro_udp4_tbl_timeout_flush(&udp_tbl, 0, > + &pkts[i], nb_pkts - i); > + } > /* Copy unprocessed packets */ > if (unprocess_num > 0) { > memcpy(&pkts[i], unprocess_pkts, > sizeof(struct rte_mbuf *) * > unprocess_num); > } > + nb_after_gro = i + unprocess_num; > } > > return nb_after_gro; > 1.8.3.1