From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4AEAB42C97; Mon, 12 Jun 2023 13:23:44 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id CD37640689; Mon, 12 Jun 2023 13:23:43 +0200 (CEST) Received: from mail-pf1-f172.google.com (mail-pf1-f172.google.com [209.85.210.172]) by mails.dpdk.org (Postfix) with ESMTP id 351194014F for ; Mon, 12 Jun 2023 13:23:42 +0200 (CEST) Received: by mail-pf1-f172.google.com with SMTP id d2e1a72fcca58-6642e056d7fso1151055b3a.1 for ; Mon, 12 Jun 2023 04:23:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1686569021; x=1689161021; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=Sx5wTcAgEn0CLITUDQsNHH5RcEMnUqnriNEbsKH+QQQ=; b=S2Jv/Lmvx9/Uv12haqRV5RU+z3Q5sf8f4EaodCekFcxvib9lc24ZcNnZzuoKL8WDCp LFFA+LAWtIXnqJWGvjy8246uDF/gbZZmEnpVfBZCPmXys2ZYEsKO2U8KAipeNY2Dx1Cw YG7yhrNeY46ZBTA0kfaaKJZ9JgzZcdDK0L6+EJTkU/ntU3QafjH3qiZyKPc+GE139Aow 871EsCQLw2tov0SokBWUZUJ8k7Vo6TDWY7YXPcH9BBOFClo0f/JbXKdNkrVEoPReRwNf E6MpMpVImdsyyeAzE5GQbX1rxJ2YCP67cH8ZK8SmTX1jvMtvF9umhrBQjNUpAeGCd5MA 3ehg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1686569021; x=1689161021; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Sx5wTcAgEn0CLITUDQsNHH5RcEMnUqnriNEbsKH+QQQ=; b=RfI2F/OgF8uJJ0+JUiu7oyoiqvfWNSZGZTZNqd3Eo/6tX9lr4Tp/9WaXQ8t4nqPiRl LMj+SphJkut4Jn3MnsUnRSJM55cgRVaxKf4erL2yX2CSPRX3Kv1prj8lpejtSNNgcKdF MmB8IcaZomxWm0Ke2vl2tF/W8WVJDc69qkunyodov7xzMb4wBPH4VHehaTQnlxI+kTWG KOFUDQFXayzaFo1w44aj+wurL/mcUZxm78nYrmnjIr4E0oKLpiLIxhP7yToRpVheyeaR FZnVo5ZkC9z4rzDusQ3jzbc8pKIrK2MoF1sYDgm4zJFO6ZcSGvBfUiGjP4wE7vgfYOZ6 AxeA== X-Gm-Message-State: AC+VfDwPC+yQyyO4G+ADIJLMtRcn2Xhe3N6YGscZBWzuml01NxL8CfLo XyJi+GzLX382/grx7awD+qRDJ5yGiFI= X-Google-Smtp-Source: ACHHUZ71XAcwBti3gvvdlt5u1LQLih9vYiQCWj6dGm5v/M6vy88yTAC2yYdh/puIW1qdRaU1yJ7edg== X-Received: by 2002:a05:6a20:8412:b0:105:4206:7062 with SMTP id c18-20020a056a20841200b0010542067062mr9982938pzd.42.1686569020661; Mon, 12 Jun 2023 04:23:40 -0700 (PDT) Received: from kparameshw7KFWX.vmware.com.com ([49.207.243.222]) by smtp.gmail.com with ESMTPSA id u11-20020a170902714b00b001ae0b373382sm8110475plm.198.2023.06.12.04.23.38 (version=TLS1_3 cipher=TLS_CHACHA20_POLY1305_SHA256 bits=256/256); Mon, 12 Jun 2023 04:23:40 -0700 (PDT) From: Kumara Parameshwaran To: jiayu.hu@intel.com Cc: dev@dpdk.org, Kumara Parameshwaran Subject: [PATCH v6] gro : ipv6 changes to support GRO for TCP/ipv6 Date: Mon, 12 Jun 2023 16:53:34 +0530 Message-Id: <20230612112334.71914-1-kumaraparamesh92@gmail.com> X-Mailer: git-send-email 2.32.1 (Apple Git-133) In-Reply-To: <20221020181425.48006-1-kumaraparmesh92@gmail.com> References: <20221020181425.48006-1-kumaraparmesh92@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org The patch adds GRO support for TCP/ipv6 packets. This does not include the support for vxlan, udp ipv6 packets. Signed-off-by: Kumara Parameshwaran --- v1: * Changes to support GRO for TCP/ipv6 packets. This does not include vxlan changes. * The GRO is performed only for ipv6 packets that does not contain extension headers. * The logic for the TCP coalescing remains the same, in ipv6 header the source address, destination address, flow label, version fields are expected to be the same. * Re-organised the code to reuse certain tcp functions for both ipv4 and ipv6 flows. v2: * Fix comments in gro_tcp6.h header file. v3: * Adderess review comments to fix code duplication for v4 and v6 v4: * Addresses review comments for v3, do not use callbacks v5: * Address review comments v6: * Fix warning and coding style issues lib/gro/gro_tcp4.c | 178 ++++++------------------- lib/gro/gro_tcp4.h | 170 +----------------------- lib/gro/gro_tcp6.c | 266 +++++++++++++++++++++++++++++++++++++ lib/gro/gro_tcp6.h | 161 ++++++++++++++++++++++ lib/gro/gro_tcp_internal.c | 128 ++++++++++++++++++ lib/gro/gro_tcp_internal.h | 212 +++++++++++++++++++++++++++++ lib/gro/gro_vxlan_tcp4.c | 23 ++-- lib/gro/gro_vxlan_tcp4.h | 3 +- lib/gro/meson.build | 2 + lib/gro/rte_gro.c | 83 ++++++++++-- lib/gro/rte_gro.h | 3 + 11 files changed, 896 insertions(+), 333 deletions(-) create mode 100644 lib/gro/gro_tcp6.c create mode 100644 lib/gro/gro_tcp6.h create mode 100644 lib/gro/gro_tcp_internal.c create mode 100644 lib/gro/gro_tcp_internal.h diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c index 0014096e63..42fee78f30 100644 --- a/lib/gro/gro_tcp4.c +++ b/lib/gro/gro_tcp4.c @@ -30,7 +30,7 @@ gro_tcp4_tbl_create(uint16_t socket_id, if (tbl == NULL) return NULL; - size = sizeof(struct gro_tcp4_item) * entries_num; + size = sizeof(struct gro_tcp_item) * entries_num; tbl->items = rte_zmalloc_socket(__func__, size, RTE_CACHE_LINE_SIZE, @@ -71,18 +71,6 @@ gro_tcp4_tbl_destroy(void *tbl) rte_free(tcp_tbl); } -static inline uint32_t -find_an_empty_item(struct gro_tcp4_tbl *tbl) -{ - uint32_t i; - uint32_t max_item_num = tbl->max_item_num; - - for (i = 0; i < max_item_num; i++) - if (tbl->items[i].firstseg == NULL) - return i; - return INVALID_ARRAY_INDEX; -} - static inline uint32_t find_an_empty_flow(struct gro_tcp4_tbl *tbl) { @@ -95,56 +83,6 @@ find_an_empty_flow(struct gro_tcp4_tbl *tbl) return INVALID_ARRAY_INDEX; } -static inline uint32_t -insert_new_item(struct gro_tcp4_tbl *tbl, - struct rte_mbuf *pkt, - uint64_t start_time, - uint32_t prev_idx, - uint32_t sent_seq, - uint16_t ip_id, - uint8_t is_atomic) -{ - uint32_t item_idx; - - item_idx = find_an_empty_item(tbl); - if (item_idx == INVALID_ARRAY_INDEX) - return INVALID_ARRAY_INDEX; - - tbl->items[item_idx].firstseg = pkt; - tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); - tbl->items[item_idx].start_time = start_time; - tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; - tbl->items[item_idx].sent_seq = sent_seq; - tbl->items[item_idx].ip_id = ip_id; - tbl->items[item_idx].nb_merged = 1; - tbl->items[item_idx].is_atomic = is_atomic; - tbl->item_num++; - - /* if the previous packet exists, chain them together. */ - if (prev_idx != INVALID_ARRAY_INDEX) { - tbl->items[item_idx].next_pkt_idx = - tbl->items[prev_idx].next_pkt_idx; - tbl->items[prev_idx].next_pkt_idx = item_idx; - } - - return item_idx; -} - -static inline uint32_t -delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, - uint32_t prev_item_idx) -{ - uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; - - /* NULL indicates an empty item */ - tbl->items[item_idx].firstseg = NULL; - tbl->item_num--; - if (prev_item_idx != INVALID_ARRAY_INDEX) - tbl->items[prev_item_idx].next_pkt_idx = next_idx; - - return next_idx; -} - static inline uint32_t insert_new_flow(struct gro_tcp4_tbl *tbl, struct tcp4_flow_key *src, @@ -159,13 +97,10 @@ insert_new_flow(struct gro_tcp4_tbl *tbl, dst = &(tbl->flows[flow_idx].key); - rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); - rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); + ASSIGN_COMMON_TCP_KEY((&src->cmn_key), (&dst->cmn_key)); + dst->ip_src_addr = src->ip_src_addr; dst->ip_dst_addr = src->ip_dst_addr; - dst->recv_ack = src->recv_ack; - dst->src_port = src->src_port; - dst->dst_port = src->dst_port; tbl->flows[flow_idx].start_index = item_idx; tbl->flow_num++; @@ -173,21 +108,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl, return flow_idx; } -/* - * update the packet length for the flushed packet. - */ -static inline void -update_header(struct gro_tcp4_item *item) -{ - struct rte_ipv4_hdr *ipv4_hdr; - struct rte_mbuf *pkt = item->firstseg; - - ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + - pkt->l2_len); - ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - - pkt->l2_len); -} - int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, struct gro_tcp4_tbl *tbl, @@ -202,9 +122,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, uint8_t is_atomic; struct tcp4_flow_key key; - uint32_t cur_idx, prev_idx, item_idx; + uint32_t item_idx; uint32_t i, max_flow_num, remaining_flow_num; - int cmp; uint8_t find; /* @@ -216,7 +135,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); - tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, pkt->l2_len + pkt->l3_len); hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len; /* @@ -230,7 +149,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, ip_tlen = rte_be_to_cpu_16(ipv4_hdr->total_length); if (pkt->pkt_len > (uint32_t)(ip_tlen + pkt->l2_len)) rte_pktmbuf_trim(pkt, pkt->pkt_len - ip_tlen - pkt->l2_len); - /* * Don't process the packet whose payload length is less than or * equal to 0. @@ -239,6 +157,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, if (tcp_dl <= 0) return -1; + rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.cmn_key.eth_saddr)); + rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.cmn_key.eth_daddr)); + key.ip_src_addr = ipv4_hdr->src_addr; + key.ip_dst_addr = ipv4_hdr->dst_addr; + key.cmn_key.src_port = tcp_hdr->src_port; + key.cmn_key.dst_port = tcp_hdr->dst_port; + key.cmn_key.recv_ack = tcp_hdr->recv_ack; /* * Save IPv4 ID for the packet whose DF bit is 0. For the packet * whose DF bit is 1, IPv4 ID is ignored. @@ -246,15 +171,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); is_atomic = (frag_off & RTE_IPV4_HDR_DF_FLAG) == RTE_IPV4_HDR_DF_FLAG; ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id); - sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); - - rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.eth_saddr)); - rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.eth_daddr)); - key.ip_src_addr = ipv4_hdr->src_addr; - key.ip_dst_addr = ipv4_hdr->dst_addr; - key.src_port = tcp_hdr->src_port; - key.dst_port = tcp_hdr->dst_port; - key.recv_ack = tcp_hdr->recv_ack; /* Search for a matched flow. */ max_flow_num = tbl->max_flow_num; @@ -270,63 +186,44 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, } } - /* - * Fail to find a matched flow. Insert a new flow and store the - * packet into the flow. - */ if (find == 0) { - item_idx = insert_new_item(tbl, pkt, start_time, - INVALID_ARRAY_INDEX, sent_seq, ip_id, - is_atomic); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl->item_num, + tbl->max_item_num, start_time, + INVALID_ARRAY_INDEX, sent_seq, ip_id, + is_atomic); if (item_idx == INVALID_ARRAY_INDEX) return -1; if (insert_new_flow(tbl, &key, item_idx) == - INVALID_ARRAY_INDEX) { + INVALID_ARRAY_INDEX) { /* * Fail to insert a new flow, so delete the * stored packet. - */ - delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); + */ + delete_tcp_item(tbl->items, item_idx, &tbl->item_num, INVALID_ARRAY_INDEX); return -1; } return 0; } - /* - * Check all packets in the flow and try to find a neighbor for - * the input packet. - */ - cur_idx = tbl->flows[i].start_index; - prev_idx = cur_idx; - do { - cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr, - sent_seq, ip_id, pkt->l4_len, tcp_dl, 0, - is_atomic); - if (cmp) { - if (merge_two_tcp4_packets(&(tbl->items[cur_idx]), - pkt, cmp, sent_seq, ip_id, 0)) - return 1; - /* - * Fail to merge the two packets, as the packet - * length is greater than the max value. Store - * the packet into the flow. - */ - if (insert_new_item(tbl, pkt, start_time, cur_idx, - sent_seq, ip_id, is_atomic) == - INVALID_ARRAY_INDEX) - return -1; - return 0; - } - prev_idx = cur_idx; - cur_idx = tbl->items[cur_idx].next_pkt_idx; - } while (cur_idx != INVALID_ARRAY_INDEX); + return process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items, tbl->flows[i].start_index, + &tbl->item_num, tbl->max_item_num, + ip_id, is_atomic, start_time); +} - /* Fail to find a neighbor, so store the packet into the flow. */ - if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq, - ip_id, is_atomic) == INVALID_ARRAY_INDEX) - return -1; +/* + * update the packet length for the flushed packet. + */ +static inline void +update_header(struct gro_tcp_item *item) +{ + struct rte_ipv4_hdr *ipv4_hdr; + struct rte_mbuf *pkt = item->firstseg; - return 0; + ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->l2_len); + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - + pkt->l2_len); } uint16_t @@ -353,7 +250,8 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, * Delete the packet and get the next * packet in the flow. */ - j = delete_item(tbl, j, INVALID_ARRAY_INDEX); + j = delete_tcp_item(tbl->items, j, + &tbl->item_num, INVALID_ARRAY_INDEX); tbl->flows[i].start_index = j; if (j == INVALID_ARRAY_INDEX) tbl->flow_num--; diff --git a/lib/gro/gro_tcp4.h b/lib/gro/gro_tcp4.h index 212f97a042..c0154afa24 100644 --- a/lib/gro/gro_tcp4.h +++ b/lib/gro/gro_tcp4.h @@ -5,32 +5,15 @@ #ifndef _GRO_TCP4_H_ #define _GRO_TCP4_H_ -#include +#include -#define INVALID_ARRAY_INDEX 0xffffffffUL #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) -/* - * The max length of a IPv4 packet, which includes the length of the L3 - * header, the L4 header and the data payload. - */ -#define MAX_IPV4_PKT_LENGTH UINT16_MAX - -/* The maximum TCP header length */ -#define MAX_TCP_HLEN 60 -#define INVALID_TCP_HDRLEN(len) \ - (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) - -/* Header fields representing a TCP/IPv4 flow */ +/* Header fields representing common fields in TCP flow */ struct tcp4_flow_key { - struct rte_ether_addr eth_saddr; - struct rte_ether_addr eth_daddr; + struct cmn_tcp_key cmn_key; uint32_t ip_src_addr; uint32_t ip_dst_addr; - - uint32_t recv_ack; - uint16_t src_port; - uint16_t dst_port; }; struct gro_tcp4_flow { @@ -42,42 +25,12 @@ struct gro_tcp4_flow { uint32_t start_index; }; -struct gro_tcp4_item { - /* - * The first MBUF segment of the packet. If the value - * is NULL, it means the item is empty. - */ - struct rte_mbuf *firstseg; - /* The last MBUF segment of the packet */ - struct rte_mbuf *lastseg; - /* - * The time when the first packet is inserted into the table. - * This value won't be updated, even if the packet is merged - * with other packets. - */ - uint64_t start_time; - /* - * next_pkt_idx is used to chain the packets that - * are in the same flow but can't be merged together - * (e.g. caused by packet reordering). - */ - uint32_t next_pkt_idx; - /* TCP sequence number of the packet */ - uint32_t sent_seq; - /* IPv4 ID of the packet */ - uint16_t ip_id; - /* the number of merged packets */ - uint16_t nb_merged; - /* Indicate if IPv4 ID can be ignored */ - uint8_t is_atomic; -}; - /* * TCP/IPv4 reassembly table structure. */ struct gro_tcp4_tbl { /* item array */ - struct gro_tcp4_item *items; + struct gro_tcp_item *items; /* flow array */ struct gro_tcp4_flow *flows; /* current item number */ @@ -186,120 +139,9 @@ uint32_t gro_tcp4_tbl_pkt_count(void *tbl); static inline int is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) { - return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && - rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && - (k1.ip_src_addr == k2.ip_src_addr) && + return ((k1.ip_src_addr == k2.ip_src_addr) && (k1.ip_dst_addr == k2.ip_dst_addr) && - (k1.recv_ack == k2.recv_ack) && - (k1.src_port == k2.src_port) && - (k1.dst_port == k2.dst_port)); + is_common_tcp_key(&k1.cmn_key, &k2.cmn_key)); } -/* - * Merge two TCP/IPv4 packets without updating checksums. - * If cmp is larger than 0, append the new packet to the - * original packet. Otherwise, pre-pend the new packet to - * the original packet. - */ -static inline int -merge_two_tcp4_packets(struct gro_tcp4_item *item, - struct rte_mbuf *pkt, - int cmp, - uint32_t sent_seq, - uint16_t ip_id, - uint16_t l2_offset) -{ - struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; - uint16_t hdr_len, l2_len; - - if (cmp > 0) { - pkt_head = item->firstseg; - pkt_tail = pkt; - } else { - pkt_head = pkt; - pkt_tail = item->firstseg; - } - - /* check if the IPv4 packet length is greater than the max value */ - hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + - pkt_head->l4_len; - l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; - if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - - hdr_len > MAX_IPV4_PKT_LENGTH)) - return 0; - - /* remove the packet header for the tail packet */ - rte_pktmbuf_adj(pkt_tail, hdr_len); - - /* chain two packets together */ - if (cmp > 0) { - item->lastseg->next = pkt; - item->lastseg = rte_pktmbuf_lastseg(pkt); - /* update IP ID to the larger value */ - item->ip_id = ip_id; - } else { - lastseg = rte_pktmbuf_lastseg(pkt); - lastseg->next = item->firstseg; - item->firstseg = pkt; - /* update sent_seq to the smaller value */ - item->sent_seq = sent_seq; - item->ip_id = ip_id; - } - item->nb_merged++; - - /* update MBUF metadata for the merged packet */ - pkt_head->nb_segs += pkt_tail->nb_segs; - pkt_head->pkt_len += pkt_tail->pkt_len; - - return 1; -} - -/* - * Check if two TCP/IPv4 packets are neighbors. - */ -static inline int -check_seq_option(struct gro_tcp4_item *item, - struct rte_tcp_hdr *tcph, - uint32_t sent_seq, - uint16_t ip_id, - uint16_t tcp_hl, - uint16_t tcp_dl, - uint16_t l2_offset, - uint8_t is_atomic) -{ - struct rte_mbuf *pkt_orig = item->firstseg; - struct rte_ipv4_hdr *iph_orig; - struct rte_tcp_hdr *tcph_orig; - uint16_t len, tcp_hl_orig; - - iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + - l2_offset + pkt_orig->l2_len); - tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); - tcp_hl_orig = pkt_orig->l4_len; - - /* Check if TCP option fields equal */ - len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr); - if ((tcp_hl != tcp_hl_orig) || ((len > 0) && - (memcmp(tcph + 1, tcph_orig + 1, - len) != 0))) - return 0; - - /* Don't merge packets whose DF bits are different */ - if (unlikely(item->is_atomic ^ is_atomic)) - return 0; - - /* check if the two packets are neighbors */ - len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - - pkt_orig->l3_len - tcp_hl_orig; - if ((sent_seq == item->sent_seq + len) && (is_atomic || - (ip_id == item->ip_id + 1))) - /* append the new packet */ - return 1; - else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || - (ip_id + item->nb_merged == item->ip_id))) - /* pre-pend the new packet */ - return -1; - - return 0; -} #endif diff --git a/lib/gro/gro_tcp6.c b/lib/gro/gro_tcp6.c new file mode 100644 index 0000000000..e0b21c33c9 --- /dev/null +++ b/lib/gro/gro_tcp6.c @@ -0,0 +1,266 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include +#include +#include + +#include "gro_tcp6.h" + +void * +gro_tcp6_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow) +{ + struct gro_tcp6_tbl *tbl; + size_t size; + uint32_t entries_num, i; + + entries_num = max_flow_num * max_item_per_flow; + entries_num = RTE_MIN(entries_num, GRO_TCP6_TBL_MAX_ITEM_NUM); + + if (entries_num == 0) + return NULL; + + tbl = rte_zmalloc_socket(__func__, + sizeof(struct gro_tcp6_tbl), + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl == NULL) + return NULL; + + size = sizeof(struct gro_tcp_item) * entries_num; + tbl->items = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->items == NULL) { + rte_free(tbl); + return NULL; + } + tbl->max_item_num = entries_num; + + size = sizeof(struct gro_tcp6_flow) * entries_num; + tbl->flows = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->flows == NULL) { + rte_free(tbl->items); + rte_free(tbl); + return NULL; + } + /* INVALID_ARRAY_INDEX indicates an empty flow */ + for (i = 0; i < entries_num; i++) + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_flow_num = entries_num; + + return tbl; +} + +void +gro_tcp6_tbl_destroy(void *tbl) +{ + struct gro_tcp6_tbl *tcp_tbl = tbl; + + if (tcp_tbl) { + rte_free(tcp_tbl->items); + rte_free(tcp_tbl->flows); + } + rte_free(tcp_tbl); +} + +static inline uint32_t +find_an_empty_flow(struct gro_tcp6_tbl *tbl) +{ + uint32_t i; + uint32_t max_flow_num = tbl->max_flow_num; + + for (i = 0; i < max_flow_num; i++) + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +insert_new_flow(struct gro_tcp6_tbl *tbl, + struct tcp6_flow_key *src, + uint32_t item_idx) +{ + struct tcp6_flow_key *dst; + uint32_t flow_idx; + + flow_idx = find_an_empty_flow(tbl); + if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) + return INVALID_ARRAY_INDEX; + + dst = &(tbl->flows[flow_idx].key); + + ASSIGN_COMMON_TCP_KEY((&src->cmn_key), (&dst->cmn_key)); + memcpy(&dst->src_addr[0], &src->src_addr[0], sizeof(dst->src_addr)); + memcpy(&dst->dst_addr[0], &src->dst_addr[0], sizeof(dst->dst_addr)); + dst->vtc_flow = src->vtc_flow; + + tbl->flows[flow_idx].start_index = item_idx; + tbl->flow_num++; + + return flow_idx; +} + +/* + * update the packet length for the flushed packet. + */ +static inline void +update_header(struct gro_tcp_item *item) +{ + struct rte_ipv6_hdr *ipv6_hdr; + struct rte_mbuf *pkt = item->firstseg; + + ipv6_hdr = (struct rte_ipv6_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->l2_len); + ipv6_hdr->payload_len = rte_cpu_to_be_16(pkt->pkt_len - + pkt->l2_len - pkt->l3_len); +} + +int32_t +gro_tcp6_reassemble(struct rte_mbuf *pkt, + struct gro_tcp6_tbl *tbl, + uint64_t start_time) +{ + struct rte_ether_hdr *eth_hdr; + struct rte_ipv6_hdr *ipv6_hdr; + int32_t tcp_dl; + uint16_t ip_tlen; + struct tcp6_flow_key key; + uint32_t i, max_flow_num, remaining_flow_num; + uint32_t sent_seq; + struct rte_tcp_hdr *tcp_hdr; + uint8_t find; + uint32_t item_idx; + /* + * Don't process the packet whose TCP header length is greater + * than 60 bytes or less than 20 bytes. + */ + if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len))) + return -1; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + ipv6_hdr = (struct rte_ipv6_hdr *)((char *)eth_hdr + pkt->l2_len); + tcp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_tcp_hdr *, pkt->l2_len + pkt->l3_len); + + /* + * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE + * or CWR set. + */ + if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG) + return -1; + + ip_tlen = rte_be_to_cpu_16(ipv6_hdr->payload_len); + /* + * Don't process the packet whose payload length is less than or + * equal to 0. + */ + tcp_dl = ip_tlen - pkt->l4_len; + if (tcp_dl <= 0) + return -1; + + rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.cmn_key.eth_saddr)); + rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.cmn_key.eth_daddr)); + memcpy(&key.src_addr[0], &ipv6_hdr->src_addr, sizeof(key.src_addr)); + memcpy(&key.dst_addr[0], &ipv6_hdr->dst_addr, sizeof(key.dst_addr)); + key.cmn_key.src_port = tcp_hdr->src_port; + key.cmn_key.dst_port = tcp_hdr->dst_port; + key.cmn_key.recv_ack = tcp_hdr->recv_ack; + key.vtc_flow = ipv6_hdr->vtc_flow; + + /* Search for a matched flow. */ + max_flow_num = tbl->max_flow_num; + remaining_flow_num = tbl->flow_num; + find = 0; + for (i = 0; i < max_flow_num && remaining_flow_num; i++) { + if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { + if (is_same_tcp6_flow(&tbl->flows[i].key, &key)) { + find = 1; + break; + } + remaining_flow_num--; + } + } + + if (find == 0) { + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + item_idx = insert_new_tcp_item(pkt, tbl->items, &tbl->item_num, tbl->max_item_num, start_time, + INVALID_ARRAY_INDEX, sent_seq, 0, true); + if (item_idx == INVALID_ARRAY_INDEX) + return -1; + if (insert_new_flow(tbl, &key, item_idx) == + INVALID_ARRAY_INDEX) { + /* + * Fail to insert a new flow, so delete the + * stored packet. + */ + delete_tcp_item(tbl->items, item_idx, &tbl->item_num, INVALID_ARRAY_INDEX); + return -1; + } + return 0; + } + + return process_tcp_item(pkt, tcp_hdr, tcp_dl, tbl->items, tbl->flows[i].start_index, + &tbl->item_num, tbl->max_item_num, + 0, true, start_time); +} + +uint16_t +gro_tcp6_tbl_timeout_flush(struct gro_tcp6_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out) +{ + uint16_t k = 0; + uint32_t i, j; + uint32_t max_flow_num = tbl->max_flow_num; + + for (i = 0; i < max_flow_num; i++) { + if (unlikely(tbl->flow_num == 0)) + return k; + + j = tbl->flows[i].start_index; + while (j != INVALID_ARRAY_INDEX) { + if (tbl->items[j].start_time <= flush_timestamp) { + out[k++] = tbl->items[j].firstseg; + if (tbl->items[j].nb_merged > 1) + update_header(&(tbl->items[j])); + /* + * Delete the packet and get the next + * packet in the flow. + */ + j = delete_tcp_item(tbl->items, j, + &tbl->item_num, INVALID_ARRAY_INDEX); + tbl->flows[i].start_index = j; + if (j == INVALID_ARRAY_INDEX) + tbl->flow_num--; + + if (unlikely(k == nb_out)) + return k; + } else + /* + * The left packets in this flow won't be + * timeout. Go to check other flows. + */ + break; + } + } + return k; +} + +uint32_t +gro_tcp6_tbl_pkt_count(void *tbl) +{ + struct gro_tcp6_tbl *gro_tbl = tbl; + + if (gro_tbl) + return gro_tbl->item_num; + + return 0; +} diff --git a/lib/gro/gro_tcp6.h b/lib/gro/gro_tcp6.h new file mode 100644 index 0000000000..2de465ecd1 --- /dev/null +++ b/lib/gro/gro_tcp6.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _GRO_TCP6_H_ +#define _GRO_TCP6_H_ + +#include + +#define INVALID_ARRAY_INDEX 0xffffffffUL +#define GRO_TCP6_TBL_MAX_ITEM_NUM (1024UL * 1024UL) + +/* Header fields representing a TCP/IPv6 flow */ +struct tcp6_flow_key { + struct cmn_tcp_key cmn_key; + uint8_t src_addr[16]; + uint8_t dst_addr[16]; + rte_be32_t vtc_flow; +}; + +struct gro_tcp6_flow { + struct tcp6_flow_key key; + /* + * The index of the first packet in the flow. + * INVALID_ARRAY_INDEX indicates an empty flow. + */ + uint32_t start_index; +}; + +/* + * TCP/IPv6 reassembly table structure. + */ +struct gro_tcp6_tbl { + /* item array */ + struct gro_tcp_item *items; + /* flow array */ + struct gro_tcp6_flow *flows; + /* current item number */ + uint32_t item_num; + /* current flow num */ + uint32_t flow_num; + /* item array size */ + uint32_t max_item_num; + /* flow array size */ + uint32_t max_flow_num; +}; + +/** + * This function creates a TCP/IPv6 reassembly table. + * + * @param socket_id + * Socket index for allocating the TCP/IPv6 reassemble table + * @param max_flow_num + * The maximum number of flows in the TCP/IPv6 GRO table + * @param max_item_per_flow + * The maximum number of packets per flow + * + * @return + * - Return the table pointer on success. + * - Return NULL on failure. + */ +void *gro_tcp6_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); + +/** + * This function destroys a TCP/IPv6 reassembly table. + * + * @param tbl + * Pointer pointing to the TCP/IPv6 reassembly table. + */ +void gro_tcp6_tbl_destroy(void *tbl); + +/** + * This function merges a TCP/IPv6 packet. It doesn't process the packet, + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have + * payload. + * + * This function doesn't check if the packet has correct checksums and + * doesn't re-calculate checksums for the merged packet. Additionally, + * it assumes the packets are complete (i.e., MF==0 && frag_off==0), + * when IP fragmentation is possible (i.e., DF==0). It returns the + * packet, if the packet has invalid parameters (e.g. SYN bit is set) + * or there is no available space in the table. + * + * @param pkt + * Packet to reassemble + * @param tbl + * Pointer pointing to the TCP/IPv6 reassembly table + * @start_time + * The time when the packet is inserted into the table + * + * @return + * - Return a positive value if the packet is merged. + * - Return zero if the packet isn't merged but stored in the table. + * - Return a negative value for invalid parameters or no available + * space in the table. + */ +int32_t gro_tcp6_reassemble(struct rte_mbuf *pkt, + struct gro_tcp6_tbl *tbl, + uint64_t start_time); + +/** + * This function flushes timeout packets in a TCP/IPv6 reassembly table, + * and without updating checksums. + * + * @param tbl + * TCP/IPv6 reassembly table pointer + * @param flush_timestamp + * Flush packets which are inserted into the table before or at the + * flush_timestamp. + * @param out + * Pointer array used to keep flushed packets + * @param nb_out + * The element number in 'out'. It also determines the maximum number of + * packets that can be flushed finally. + * + * @return + * The number of flushed packets + */ +uint16_t gro_tcp6_tbl_timeout_flush(struct gro_tcp6_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out); + +/** + * This function returns the number of the packets in a TCP/IPv6 + * reassembly table. + * + * @param tbl + * TCP/IPv6 reassembly table pointer + * + * @return + * The number of packets in the table + */ +uint32_t gro_tcp6_tbl_pkt_count(void *tbl); + +/* + * Check if two TCP/IPv6 packets belong to the same flow. + */ +static inline int +is_same_tcp6_flow(struct tcp6_flow_key *k1, struct tcp6_flow_key *k2) +{ + rte_be32_t vtc_flow_diff; + + if (memcmp(&k1->src_addr, &k2->src_addr, 16) + return 0; + if (memcmp(&k1->dst_addr, &k2->dst_addr, 16) + return 0; + /* + * IP version (4) Traffic Class (8) Flow Label (20) + * All fields except Traffic class should be same + */ + vtc_flow_diff = (k1->vtc_flow ^ k2->vtc_flow); + if (vtc_flow_diff & htonl(0xF00FFFFF)) + return 0; + + return is_common_tcp_key(&k1->cmn_key, &k2->cmn_key); +} + +#endif diff --git a/lib/gro/gro_tcp_internal.c b/lib/gro/gro_tcp_internal.c new file mode 100644 index 0000000000..5a21bca7f8 --- /dev/null +++ b/lib/gro/gro_tcp_internal.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ +#include +#include +#include + +#include "gro_tcp_internal.h" + +static inline uint32_t +find_an_empty_item(struct gro_tcp_item *items, + uint32_t max_item_num) +{ + uint32_t i; + + for (i = 0; i < max_item_num; i++) + if (items[i].firstseg == NULL) + return i; + return INVALID_ARRAY_INDEX; +} + +inline uint32_t +insert_new_tcp_item(struct rte_mbuf *pkt, + struct gro_tcp_item *items, + uint32_t *item_num, + uint32_t max_item_num, + uint64_t start_time, + uint32_t prev_idx, + uint32_t sent_seq, + uint16_t ip_id, + uint8_t is_atomic) +{ + uint32_t item_idx; + + item_idx = find_an_empty_item(items, max_item_num); + if (item_idx == INVALID_ARRAY_INDEX) + return INVALID_ARRAY_INDEX; + + items[item_idx].firstseg = pkt; + items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); + items[item_idx].start_time = start_time; + items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; + items[item_idx].sent_seq = sent_seq; + items[item_idx].l3.ip_id = ip_id; + items[item_idx].nb_merged = 1; + items[item_idx].is_atomic = is_atomic; + (*item_num) += 1; + + /* if the previous packet exists, chain them together. */ + if (prev_idx != INVALID_ARRAY_INDEX) { + items[item_idx].next_pkt_idx = + items[prev_idx].next_pkt_idx; + items[prev_idx].next_pkt_idx = item_idx; + } + + return item_idx; +} + +inline uint32_t +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx, + uint32_t *item_num, + uint32_t prev_item_idx) +{ + uint32_t next_idx = items[item_idx].next_pkt_idx; + + /* NULL indicates an empty item */ + items[item_idx].firstseg = NULL; + (*item_num) -= 1; + if (prev_item_idx != INVALID_ARRAY_INDEX) + items[prev_item_idx].next_pkt_idx = next_idx; + + return next_idx; +} + +int32_t +process_tcp_item(struct rte_mbuf *pkt, + struct rte_tcp_hdr *tcp_hdr, + int32_t tcp_dl, + struct gro_tcp_item *items, + uint32_t item_idx, + uint32_t *item_num, + uint32_t max_item_num, + uint16_t ip_id, + uint8_t is_atomic, + uint64_t start_time) +{ + uint32_t cur_idx; + uint32_t prev_idx; + int cmp; + uint32_t sent_seq; + + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + /* + * Check all packets in the flow and try to find a neighbor for + * the input packet. + */ + cur_idx = item_idx; + prev_idx = cur_idx; + do { + cmp = check_seq_option(&items[cur_idx], tcp_hdr, + sent_seq, ip_id, pkt->l4_len, tcp_dl, 0, + is_atomic); + if (cmp) { + if (merge_two_tcp_packets(&items[cur_idx], + pkt, cmp, sent_seq, ip_id, 0)) + return 1; + /* + * Fail to merge the two packets, as the packet + * length is greater than the max value. Store + * the packet into the flow. + */ + if (insert_new_tcp_item(pkt, items, item_num, max_item_num, + start_time, cur_idx, sent_seq, ip_id, is_atomic) == + INVALID_ARRAY_INDEX) + return -1; + return 0; + } + prev_idx = cur_idx; + cur_idx = items[cur_idx].next_pkt_idx; + } while (cur_idx != INVALID_ARRAY_INDEX); + + /* Fail to find a neighbor, so store the packet into the flow. */ + if (insert_new_tcp_item(pkt, items, item_num, max_item_num, start_time, prev_idx, sent_seq, + ip_id, is_atomic) == INVALID_ARRAY_INDEX) + return -1; + + return 0; +} diff --git a/lib/gro/gro_tcp_internal.h b/lib/gro/gro_tcp_internal.h new file mode 100644 index 0000000000..072b7aea13 --- /dev/null +++ b/lib/gro/gro_tcp_internal.h @@ -0,0 +1,212 @@ +#ifndef _GRO_TCP_H_ +#define _GRO_TCP_H_ + +#define INVALID_ARRAY_INDEX 0xffffffffUL + +#include + +/* + * The max length of a IPv4 packet, which includes the length of the L3 + * header, the L4 header and the data payload. + */ +#define MAX_IP_PKT_LENGTH UINT16_MAX + +/* The maximum TCP header length */ +#define MAX_TCP_HLEN 60 +#define INVALID_TCP_HDRLEN(len) \ + (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) + +struct cmn_tcp_key { + struct rte_ether_addr eth_saddr; + struct rte_ether_addr eth_daddr; + uint32_t recv_ack; + uint16_t src_port; + uint16_t dst_port; +}; + +#define ASSIGN_COMMON_TCP_KEY(k1, k2) \ + do {\ + rte_ether_addr_copy(&(k1->eth_saddr), &(k2->eth_saddr)); \ + rte_ether_addr_copy(&(k1->eth_daddr), &(k2->eth_daddr)); \ + k2->recv_ack = k1->recv_ack; \ + k2->src_port = k1->src_port; \ + k2->dst_port = k1->dst_port; \ + } while (0) + +struct gro_tcp_item { + /* + * The first MBUF segment of the packet. If the value + * is NULL, it means the item is empty. + */ + struct rte_mbuf *firstseg; + /* The last MBUF segment of the packet */ + struct rte_mbuf *lastseg; + /* + * The time when the first packet is inserted into the table. + * This value won't be updated, even if the packet is merged + * with other packets. + */ + uint64_t start_time; + /* + * next_pkt_idx is used to chain the packets that + * are in the same flow but can't be merged together + * (e.g. caused by packet reordering). + */ + uint32_t next_pkt_idx; + /* TCP sequence number of the packet */ + uint32_t sent_seq; + union { + /* IPv4 ID of the packet */ + uint16_t ip_id; + /* Unused field for IPv6 */ + uint16_t unused; + } l3; + /* the number of merged packets */ + uint16_t nb_merged; + /* Indicate if IPv4 ID can be ignored */ + uint8_t is_atomic; +}; + +uint32_t +insert_new_tcp_item(struct rte_mbuf *pkt, + struct gro_tcp_item *items, + uint32_t *item_num, + uint32_t max_item_num, + uint64_t start_time, + uint32_t prev_idx, + uint32_t sent_seq, + uint16_t ip_id, + uint8_t is_atomic); + +uint32_t +delete_tcp_item(struct gro_tcp_item *items, uint32_t item_idx, + uint32_t *item_num, + uint32_t prev_item_idx); + +int32_t +process_tcp_item(struct rte_mbuf *pkt, + struct rte_tcp_hdr *tcp_hdr, + int32_t tcp_dl, + struct gro_tcp_item *items, + uint32_t item_idx, + uint32_t *item_num, + uint32_t max_item_num, + uint16_t ip_id, + uint8_t is_atomic, + uint64_t start_time); + +/* + * Merge two TCP packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp_packets(struct gro_tcp_item *item, + struct rte_mbuf *pkt, + int cmp, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t l2_offset) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t hdr_len, l2_len; + + if (cmp > 0) { + pkt_head = item->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item->firstseg; + } + + /* check if the IPv4 packet length is greater than the max value */ + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + + pkt_head->l4_len; + l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; + if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - + hdr_len > MAX_IP_PKT_LENGTH)) + return 0; + + /* remove the packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, hdr_len); + + /* chain two packets together */ + if (cmp > 0) { + item->lastseg->next = pkt; + item->lastseg = rte_pktmbuf_lastseg(pkt); + /* update IP ID to the larger value */ + item->l3.ip_id = ip_id; + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item->firstseg; + item->firstseg = pkt; + /* update sent_seq to the smaller value */ + item->sent_seq = sent_seq; + item->l3.ip_id = ip_id; + } + item->nb_merged++; + + /* update MBUF metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} + +/* + * Check if two TCP/IPv4 packets are neighbors. + */ +static inline int +check_seq_option(struct gro_tcp_item *item, + struct rte_tcp_hdr *tcph, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint16_t l2_offset, + uint8_t is_atomic) +{ + struct rte_mbuf *pkt_orig = item->firstseg; + char *iph_orig; + struct rte_tcp_hdr *tcph_orig; + uint16_t len, tcp_hl_orig; + + iph_orig = (char *)(rte_pktmbuf_mtod(pkt_orig, char *) + + l2_offset + pkt_orig->l2_len); + tcph_orig = (struct rte_tcp_hdr *)(iph_orig + pkt_orig->l3_len); + tcp_hl_orig = pkt_orig->l4_len; + + /* Check if TCP option fields equal */ + len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr); + if ((tcp_hl != tcp_hl_orig) || ((len > 0) && + (memcmp(tcph + 1, tcph_orig + 1, + len) != 0))) + return 0; + + /* Don't merge packets whose DF bits are different */ + if (unlikely(item->is_atomic ^ is_atomic)) + return 0; + + /* check if the two packets are neighbors */ + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - + pkt_orig->l3_len - tcp_hl_orig; + if ((sent_seq == item->sent_seq + len) && (is_atomic || + (ip_id == item->l3.ip_id + 1))) + /* append the new packet */ + return 1; + else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || + (ip_id + item->nb_merged == item->l3.ip_id))) + /* pre-pend the new packet */ + return -1; + + return 0; +} + +static inline int +is_common_tcp_key(struct cmn_tcp_key *k1, struct cmn_tcp_key *k2) +{ + return (!memcmp(k1, k2, sizeof(struct cmn_tcp_key))); +} + +#endif diff --git a/lib/gro/gro_vxlan_tcp4.c b/lib/gro/gro_vxlan_tcp4.c index 3be4deb7c7..81eebf0d2d 100644 --- a/lib/gro/gro_vxlan_tcp4.c +++ b/lib/gro/gro_vxlan_tcp4.c @@ -7,6 +7,7 @@ #include #include +#include "gro_tcp_internal.h" #include "gro_vxlan_tcp4.h" void * @@ -116,7 +117,7 @@ insert_new_item(struct gro_vxlan_tcp4_tbl *tbl, tbl->items[item_idx].inner_item.start_time = start_time; tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX; tbl->items[item_idx].inner_item.sent_seq = sent_seq; - tbl->items[item_idx].inner_item.ip_id = ip_id; + tbl->items[item_idx].inner_item.l3.ip_id = ip_id; tbl->items[item_idx].inner_item.nb_merged = 1; tbl->items[item_idx].inner_item.is_atomic = is_atomic; tbl->items[item_idx].outer_ip_id = outer_ip_id; @@ -163,15 +164,9 @@ insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl, dst = &(tbl->flows[flow_idx].key); - rte_ether_addr_copy(&(src->inner_key.eth_saddr), - &(dst->inner_key.eth_saddr)); - rte_ether_addr_copy(&(src->inner_key.eth_daddr), - &(dst->inner_key.eth_daddr)); + ASSIGN_COMMON_TCP_KEY((&(src->inner_key.cmn_key)), (&(dst->inner_key.cmn_key))); dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr; dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr; - dst->inner_key.recv_ack = src->inner_key.recv_ack; - dst->inner_key.src_port = src->inner_key.src_port; - dst->inner_key.dst_port = src->inner_key.dst_port; dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags; dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni; @@ -248,7 +243,7 @@ merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item, uint16_t outer_ip_id, uint16_t ip_id) { - if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq, + if (merge_two_tcp_packets(&item->inner_item, pkt, cmp, sent_seq, ip_id, pkt->outer_l2_len + pkt->outer_l3_len)) { /* Update the outer IPv4 ID to the large value. */ @@ -357,13 +352,13 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt, sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); - rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.eth_saddr)); - rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.eth_daddr)); + rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.cmn_key.eth_saddr)); + rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.cmn_key.eth_daddr)); key.inner_key.ip_src_addr = ipv4_hdr->src_addr; key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr; - key.inner_key.recv_ack = tcp_hdr->recv_ack; - key.inner_key.src_port = tcp_hdr->src_port; - key.inner_key.dst_port = tcp_hdr->dst_port; + key.inner_key.cmn_key.recv_ack = tcp_hdr->recv_ack; + key.inner_key.cmn_key.src_port = tcp_hdr->src_port; + key.inner_key.cmn_key.dst_port = tcp_hdr->dst_port; key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags; key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni; diff --git a/lib/gro/gro_vxlan_tcp4.h b/lib/gro/gro_vxlan_tcp4.h index 7832942a68..82eaaee11e 100644 --- a/lib/gro/gro_vxlan_tcp4.h +++ b/lib/gro/gro_vxlan_tcp4.h @@ -5,6 +5,7 @@ #ifndef _GRO_VXLAN_TCP4_H_ #define _GRO_VXLAN_TCP4_H_ +#include "gro_tcp_internal.h" #include "gro_tcp4.h" #define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) @@ -36,7 +37,7 @@ struct gro_vxlan_tcp4_flow { }; struct gro_vxlan_tcp4_item { - struct gro_tcp4_item inner_item; + struct gro_tcp_item inner_item; /* IPv4 ID in the outer IPv4 header */ uint16_t outer_ip_id; /* Indicate if outer IPv4 ID can be ignored */ diff --git a/lib/gro/meson.build b/lib/gro/meson.build index e4fa2958bd..1640317890 100644 --- a/lib/gro/meson.build +++ b/lib/gro/meson.build @@ -3,7 +3,9 @@ sources = files( 'rte_gro.c', + 'gro_tcp_internal.c', 'gro_tcp4.c', + 'gro_tcp6.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c', 'gro_vxlan_udp4.c', diff --git a/lib/gro/rte_gro.c b/lib/gro/rte_gro.c index e35399fd42..d824eebd93 100644 --- a/lib/gro/rte_gro.c +++ b/lib/gro/rte_gro.c @@ -8,6 +8,7 @@ #include "rte_gro.h" #include "gro_tcp4.h" +#include "gro_tcp6.h" #include "gro_udp4.h" #include "gro_vxlan_tcp4.h" #include "gro_vxlan_udp4.h" @@ -20,14 +21,16 @@ typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, - gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, NULL}; + gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, gro_tcp6_tbl_create, NULL}; static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy, gro_udp4_tbl_destroy, gro_vxlan_udp4_tbl_destroy, + gro_tcp6_tbl_destroy, NULL}; static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count, gro_udp4_tbl_pkt_count, gro_vxlan_udp4_tbl_pkt_count, + gro_tcp6_tbl_pkt_count, NULL}; #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ @@ -35,6 +38,12 @@ static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { ((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \ (RTE_ETH_IS_TUNNEL_PKT(ptype) == 0)) +/* GRO with extension headers is not supported */ +#define IS_IPV6_TCP_PKT(ptype) (RTE_ETH_IS_IPV6_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP) && \ + ((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \ + (RTE_ETH_IS_TUNNEL_PKT(ptype) == 0)) + #define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \ (RTE_ETH_IS_TUNNEL_PKT(ptype) == 0)) @@ -147,7 +156,11 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, /* allocate a reassembly table for TCP/IPv4 GRO */ struct gro_tcp4_tbl tcp_tbl; struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; - struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; + struct gro_tcp_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; + + struct gro_tcp6_tbl tcp6_tbl; + struct gro_tcp6_flow tcp6_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_tcp_item tcp6_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; /* allocate a reassembly table for UDP/IPv4 GRO */ struct gro_udp4_tbl udp_tbl; @@ -171,10 +184,10 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, int32_t ret; uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts; uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0, - do_vxlan_udp_gro = 0; + do_vxlan_udp_gro = 0, do_tcp6_gro = 0; if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | - RTE_GRO_TCP_IPV4 | + RTE_GRO_TCP_IPV4 | RTE_GRO_TCP_IPV6 | RTE_GRO_IPV4_VXLAN_UDP_IPV4 | RTE_GRO_UDP_IPV4)) == 0)) return nb_pkts; @@ -236,6 +249,18 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, do_udp4_gro = 1; } + if (param->gro_types & RTE_GRO_TCP_IPV6) { + for (i = 0; i < item_num; i++) + tcp6_flows[i].start_index = INVALID_ARRAY_INDEX; + + tcp6_tbl.flows = tcp6_flows; + tcp6_tbl.items = tcp6_items; + tcp6_tbl.flow_num = 0; + tcp6_tbl.item_num = 0; + tcp6_tbl.max_flow_num = item_num; + tcp6_tbl.max_item_num = item_num; + do_tcp6_gro = 1; + } for (i = 0; i < nb_pkts; i++) { /* @@ -276,6 +301,14 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, nb_after_gro--; else if (ret < 0) unprocess_pkts[unprocess_num++] = pkts[i]; + } else if (IS_IPV6_TCP_PKT(pkts[i]->packet_type) && + do_tcp6_gro) { + ret = gro_tcp6_reassemble(pkts[i], &tcp6_tbl, 0); + if (ret > 0) + /* merge successfully */ + nb_after_gro--; + else if (ret < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; } else unprocess_pkts[unprocess_num++] = pkts[i]; } @@ -283,9 +316,17 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, if ((nb_after_gro < nb_pkts) || (unprocess_num < nb_pkts)) { i = 0; + /* Copy unprocessed packets */ + if (unprocess_num > 0) { + memcpy(&pkts[i], unprocess_pkts, + sizeof(struct rte_mbuf *) * + unprocess_num); + i = unprocess_num; + } + /* Flush all packets from the tables */ if (do_vxlan_tcp_gro) { - i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl, + i += gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl, 0, pkts, nb_pkts); } @@ -304,13 +345,11 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, i += gro_udp4_tbl_timeout_flush(&udp_tbl, 0, &pkts[i], nb_pkts - i); } - /* Copy unprocessed packets */ - if (unprocess_num > 0) { - memcpy(&pkts[i], unprocess_pkts, - sizeof(struct rte_mbuf *) * - unprocess_num); + + if (do_tcp6_gro) { + i += gro_tcp6_tbl_timeout_flush(&tcp6_tbl, 0, + &pkts[i], nb_pkts - i); } - nb_after_gro = i + unprocess_num; } return nb_after_gro; @@ -323,13 +362,13 @@ rte_gro_reassemble(struct rte_mbuf **pkts, { struct rte_mbuf *unprocess_pkts[nb_pkts]; struct gro_ctx *gro_ctx = ctx; - void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl; + void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl, *tcp6_tbl; uint64_t current_time; uint16_t i, unprocess_num = 0; - uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro; + uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro, do_tcp6_gro; if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | - RTE_GRO_TCP_IPV4 | + RTE_GRO_TCP_IPV4 | RTE_GRO_TCP_IPV6 | RTE_GRO_IPV4_VXLAN_UDP_IPV4 | RTE_GRO_UDP_IPV4)) == 0)) return nb_pkts; @@ -338,6 +377,7 @@ rte_gro_reassemble(struct rte_mbuf **pkts, vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX]; udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX]; vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX]; + tcp6_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV6_INDEX]; do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == RTE_GRO_TCP_IPV4; @@ -347,6 +387,7 @@ rte_gro_reassemble(struct rte_mbuf **pkts, RTE_GRO_UDP_IPV4; do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) == RTE_GRO_IPV4_VXLAN_UDP_IPV4; + do_tcp6_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV6) == RTE_GRO_TCP_IPV6; current_time = rte_rdtsc(); @@ -371,6 +412,11 @@ rte_gro_reassemble(struct rte_mbuf **pkts, if (gro_udp4_reassemble(pkts[i], udp_tbl, current_time) < 0) unprocess_pkts[unprocess_num++] = pkts[i]; + } else if (IS_IPV6_TCP_PKT(pkts[i]->packet_type) && + do_tcp6_gro) { + if (gro_tcp6_reassemble(pkts[i], tcp6_tbl, + current_time) < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; } else unprocess_pkts[unprocess_num++] = pkts[i]; } @@ -426,6 +472,15 @@ rte_gro_timeout_flush(void *ctx, gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX], flush_timestamp, &out[num], left_nb_out); + left_nb_out = max_nb_out - num; + } + + if ((gro_types & RTE_GRO_TCP_IPV6) && left_nb_out > 0) { + num += gro_tcp6_tbl_timeout_flush( + gro_ctx->tbls[RTE_GRO_TCP_IPV6_INDEX], + flush_timestamp, + &out[num], left_nb_out); + } return num; diff --git a/lib/gro/rte_gro.h b/lib/gro/rte_gro.h index 9f9ed4935a..c83dfd9ad1 100644 --- a/lib/gro/rte_gro.h +++ b/lib/gro/rte_gro.h @@ -38,6 +38,9 @@ extern "C" { #define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3 #define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX) /**< VxLAN UDP/IPv4 GRO flag. */ +#define RTE_GRO_TCP_IPV6_INDEX 4 +#define RTE_GRO_TCP_IPV6 (1ULL << RTE_GRO_TCP_IPV6_INDEX) +/**< TCP/IPv6 GRO flag. */ /** * Structure used to create GRO context objects or used to pass -- 2.25.1