DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] gso: add VXLAN UDP GSO support
@ 2020-07-01  6:46 yang_y_yi
  2020-09-27  5:57 ` yang_y_yi
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: yang_y_yi @ 2020-07-01  6:46 UTC (permalink / raw)
  To: dev; +Cc: jiayu.hu, thomas, yangyi01, yang_y_yi

From: Yi Yang <yangyi01@inspur.com>

Many NICs can't offload VXLAN UFO, so it is very important
to do VXLAN UDP GSO by software to improve VM-to-VM UDP
performance, especially for the case that VM MTU is just
1500 but not 9000.

With this enabled in DPDK, OVS DPDK can leverage it to
improve VM-to-VM UDP performance, performance gain is very
huge, over 2 times.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
 lib/librte_gso/Makefile          |   1 +
 lib/librte_gso/gso_common.h      |   5 ++
 lib/librte_gso/gso_tunnel_udp4.c | 108 +++++++++++++++++++++++++++++++++++++++
 lib/librte_gso/gso_tunnel_udp4.h |  43 ++++++++++++++++
 lib/librte_gso/meson.build       |   2 +-
 lib/librte_gso/rte_gso.c         |   8 +++
 6 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_gso/gso_tunnel_udp4.c
 create mode 100644 lib/librte_gso/gso_tunnel_udp4.h

diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index a34846e..3005817 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -17,6 +17,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_udp4.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
 
 # install this header file
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index a0b8343..4d5f303 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -26,6 +26,11 @@
 		(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
 		 PKT_TX_TUNNEL_VXLAN))
 
+#define IS_IPV4_VXLAN_UDP4(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4 | \
+				PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
+		(PKT_TX_UDP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+		 PKT_TX_TUNNEL_VXLAN))
+
 #define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
 				PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
 		(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
diff --git a/lib/librte_gso/gso_tunnel_udp4.c b/lib/librte_gso/gso_tunnel_udp4.c
new file mode 100644
index 0000000..1a018ee
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_udp4.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#include "gso_common.h"
+#include "gso_tunnel_udp4.h"
+
+#define IPV4_HDR_MF_BIT (1U << 13)
+
+static void
+update_tunnel_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
+			       uint16_t nb_segs)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	uint16_t outer_id, inner_id, tail_idx, i, length;
+	uint16_t outer_ipv4_offset, inner_ipv4_offset;
+	uint16_t udp_gre_offset, udp_offset;
+	uint8_t update_udp_hdr;
+	uint16_t frag_offset = 0, is_mf;
+
+	outer_ipv4_offset = pkt->outer_l2_len;
+	udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len;
+	inner_ipv4_offset = udp_gre_offset + pkt->l2_len;
+	udp_offset = inner_ipv4_offset + pkt->l3_len;
+
+	/* Outer IPv4 header. */
+	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			outer_ipv4_offset);
+	outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	/* Inner IPv4 header. */
+	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			inner_ipv4_offset);
+	inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+	tail_idx = nb_segs - 1;
+
+	/* Only update UDP header for VxLAN packets. */
+	update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0;
+
+	for (i = 0; i < nb_segs; i++) {
+		update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
+		if (update_udp_hdr)
+			update_udp_header(segs[i], udp_gre_offset);
+		update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
+		update_udp_header(segs[i], udp_offset);
+		/* For the case inner packet is UDP, we must keep UDP
+		 * datagram boundary, it must be handled as IP fragment.
+		 *
+		 * Set IP fragment offset for inner IP header.
+		 */
+		ipv4_hdr = (struct rte_ipv4_hdr *)
+			(rte_pktmbuf_mtod(segs[i], char *) +
+				inner_ipv4_offset);
+		is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
+		ipv4_hdr->fragment_offset =
+			rte_cpu_to_be_16(frag_offset | is_mf);
+		length = segs[i]->pkt_len - inner_ipv4_offset - pkt->l3_len;
+		frag_offset += (length >> 3);
+		outer_id++;
+	}
+}
+
+int
+gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	struct rte_ipv4_hdr *inner_ipv4_hdr;
+	uint16_t pyld_unit_size, hdr_offset, frag_off;
+	int ret = 1;
+
+	hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
+	inner_ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			hdr_offset);
+	/*
+	 * Don't process the packet whose MF bit or offset in the inner
+	 * IPv4 header are non-zero.
+	 */
+	frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset);
+	if (unlikely(IS_FRAGMENTED(frag_off))) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	hdr_offset += pkt->l3_len;
+	/* Don't process the packet without data */
+	if ((hdr_offset + pkt->l4_len) >= pkt->pkt_len) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	/* pyld_unit_size must be a multiple of 8 because frag_off
+	 * uses 8 bytes as unit.
+	 */
+	pyld_unit_size = (gso_size - hdr_offset) & ~7U;
+
+	/* Segment the payload */
+	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+			indirect_pool, pkts_out, nb_pkts_out);
+	if (ret > 1)
+		update_tunnel_ipv4_udp_headers(pkt, pkts_out, ret);
+
+	return ret;
+}
diff --git a/lib/librte_gso/gso_tunnel_udp4.h b/lib/librte_gso/gso_tunnel_udp4.h
new file mode 100644
index 0000000..d56e342
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_udp4.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#ifndef _GSO_TUNNEL_UDP4_H_
+#define _GSO_TUNNEL_UDP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment a tunneling packet with inner TCP/IPv4 headers. This function
+ * does not check if the input packet has correct checksums, and does not
+ * update checksums for output GSO segments. Furthermore, it does not
+ * process IP fragment packets.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param gso_size
+ *  The max length of a GSO segment, measured in bytes.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when it succeeds. If the memory space in pkts_out is
+ *  insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ *   - The number of GSO segments filled in pkts_out on success.
+ *   - Return -ENOMEM if run out of memory in MBUF pools.
+ *   - Return -EINVAL for invalid parameters.
+ */
+int gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build
index ad8dd85..05904f2 100644
--- a/lib/librte_gso/meson.build
+++ b/lib/librte_gso/meson.build
@@ -2,6 +2,6 @@
 # Copyright(c) 2017 Intel Corporation
 
 sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
- 		'gso_tunnel_tcp4.c', 'rte_gso.c')
+		'gso_tunnel_tcp4.c', 'gso_tunnel_udp4.c', 'rte_gso.c')
 headers = files('rte_gso.h')
 deps += ['ethdev']
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index 751b5b6..cf401b2 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -11,6 +11,7 @@
 #include "gso_common.h"
 #include "gso_tcp4.h"
 #include "gso_tunnel_tcp4.h"
+#include "gso_tunnel_udp4.h"
 #include "gso_udp4.h"
 
 #define ILLEGAL_UDP_GSO_CTX(ctx) \
@@ -62,6 +63,13 @@
 		ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
 				direct_pool, indirect_pool,
 				pkts_out, nb_pkts_out);
+	} else if (IS_IPV4_VXLAN_UDP4(pkt->ol_flags) &&
+			(gso_ctx->gso_types & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+					       DEV_TX_OFFLOAD_UDP_TSO))) {
+		pkt->ol_flags &= (~PKT_TX_UDP_SEG);
+		ret = gso_tunnel_udp4_segment(pkt, gso_size,
+				direct_pool, indirect_pool,
+				pkts_out, nb_pkts_out);
 	} else if (IS_IPV4_TCP(pkt->ol_flags) &&
 			(gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) {
 		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-01-18  0:05 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-01  6:46 [dpdk-dev] [PATCH] gso: add VXLAN UDP GSO support yang_y_yi
2020-09-27  5:57 ` yang_y_yi
2020-10-29  6:47 ` [dpdk-dev] [PATCH v1] " yang_y_yi
2020-11-06  4:09   ` Jiayu Hu
2020-11-09  1:03     ` yang_y_yi
2020-11-10  2:21 ` [dpdk-dev] [PATCH v2] " yang_y_yi
2020-11-13 13:16   ` Ananyev, Konstantin
2020-11-16  0:50     ` yang_y_yi
2020-11-16  1:11   ` [dpdk-dev] [PATCH v3] " yang_y_yi
2020-11-19  5:37     ` Hu, Jiayu
2020-11-19  6:43     ` [dpdk-dev] [PATCH v4] gso: add VXLAN UDP/IPv4 support yang_y_yi
2020-11-19  6:49       ` Hu, Jiayu
2021-01-15  3:51         ` yang_y_yi
2021-01-15 10:24           ` Thomas Monjalon
2021-01-18  0:05             ` yang_y_yi

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git