DPDK patches and discussions
 help / color / mirror / Atom feed
From: Jijiang Liu <jijiang.liu@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [RFC PATCH 5/6] rte_ether: implement encap and decap APIs
Date: Wed, 23 Dec 2015 16:49:51 +0800	[thread overview]
Message-ID: <1450860592-12673-6-git-send-email-jijiang.liu@intel.com> (raw)
In-Reply-To: <1450860592-12673-1-git-send-email-jijiang.liu@intel.com>

Using SIMD instruction to accelarate encapsulation operation.

Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_ether/libtunnel/rte_vxlan_opt.c |  251 ++++++++++++++++++++++++++++
 1 files changed, 251 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_ether/libtunnel/rte_vxlan_opt.c

diff --git a/lib/librte_ether/libtunnel/rte_vxlan_opt.c b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
new file mode 100644
index 0000000..e59ed2c
--- /dev/null
+++ b/lib/librte_ether/libtunnel/rte_vxlan_opt.c
@@ -0,0 +1,251 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_byteorder.h>
+#include <rte_prefetch.h>
+#include <rte_ethdev.h>
+
+#include <immintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+
+#include "vxlan_opt.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
+#define PORT_MIN    49152
+#define PORT_MAX    65535
+#define PORT_RANGE ((PORT_MAX - PORT_MIN) + 1)
+
+#define DUMMY_FOR_TEST
+#define RTE_DEFAULT_VXLAN_PORT 4789
+ 
+#define LOOP           4
+#define MAC_LEN        6
+#define PREFIX         ETHER_HDR_LEN + 4
+#define UDP_PRE_SZ     (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
+#define IP_PRE_SZ      (UDP_PRE_SZ + sizeof(struct ipv4_hdr))
+#define VXLAN_PKT_HDR_SIZE       (IP_PRE_SZ + ETHER_HDR_LEN)
+ 
+#define VXLAN_SIZE     sizeof(struct vxlan_hdr)
+#define INNER_PRE_SZ   (14 + 20 + 8 + 8)
+#define DECAP_OFFSET   (16 + 8 + 8)
+#define DETECT_OFFSET  12
+
+struct eth_pkt_info {
+	uint8_t l2_len;
+	uint16_t ethertype;
+	uint16_t l3_len;
+	uint16_t l4_proto;
+	uint16_t l4_len;
+};
+
+/* 16Bytes tx meta data */
+struct vxlan_tx_meta {
+	uint32_t sip;
+	uint32_t dip;
+	uint32_t vni;
+	uint16_t sport;
+} __attribute__((__aligned__(16)));
+
+
+/* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct eth_pkt_info *info)
+{
+	struct tcp_hdr *tcp_hdr;
+
+	info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+	info->l4_proto = ipv4_hdr->next_proto_id;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else
+		info->l4_len = 0;
+}
+
+/* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */
+static void
+parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct eth_pkt_info *info)
+{
+	struct tcp_hdr *tcp_hdr;
+
+	info->l3_len = sizeof(struct ipv6_hdr);
+	info->l4_proto = ipv6_hdr->proto;
+
+	/* only fill l4_len for TCP, it's useful for TSO */
+	if (info->l4_proto == IPPROTO_TCP) {
+		tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len);
+		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+	} else
+		info->l4_len = 0;
+}
+
+/*
+ * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
+ * header. The l4_len argument is only set in case of TCP (useful for TSO).
+ */
+static void
+parse_ethernet(struct ether_hdr *eth_hdr, struct eth_pkt_info *info)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+
+	info->l2_len = sizeof(struct ether_hdr);
+	info->ethertype = eth_hdr->ether_type;
+
+	if (info->ethertype == _htons(ETHER_TYPE_VLAN)) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		info->l2_len  += sizeof(struct vlan_hdr);
+		info->ethertype = vlan_hdr->eth_proto;
+	}
+
+	switch (info->ethertype) {
+	case _htons(ETHER_TYPE_IPv4):
+		ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr + info->l2_len);
+		rte_parse_ipv4(ipv4_hdr, info);
+		break;
+	case _htons(ETHER_TYPE_IPv6):
+		ipv6_hdr = (struct ipv6_hdr *) ((char *)eth_hdr + info->l2_len);
+		rte_parse_ipv6(ipv6_hdr, info);
+		break;
+	default:
+		info->l4_len = 0;
+		info->l3_len = 0;
+		info->l4_proto = 0;
+		break;
+	}
+}
+
+extern void
+rte_vxlan_decap_burst(uint8_t port, uint16_t queue,
+                      struct rte_mbuf *pkts[], uint16_t nb_pkts,
+                      void *user_param)
+{
+	char *pkt;
+	struct eth_pkt_info info;
+	uint16_t outer_hdr_len;
+	uint16_t nb_rx = 0;
+
+	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(pkt[nb_rx], struct ether_hdr *);
+	
+	/* Assume we are using same rule on this queue,and just analyse first packet */
+	if (user_param == NULL)
+		return;
+
+	parse_ethernet(eth_hdr, &info);
+	outer_hdr_len = info.l2_len + info.l3_len + info.l4_len +
+			sizeof(struct vxlan_hdr);
+
+	rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+
+	while (nb_rx < nb_pkts)
+		rte_pktmbuf_adj(pkt[nb_rx++], outer_header_len);
+}
+
+/* Encapsulation using SIMD and flow rule  to accelarate this operation */
+
+extern void 
+rte_vxlan_encap_burst(uint8_t port, uint16_t queue,
+        struct rte_mbuf *pkts[], uint16_t nb_pkts,
+        rte_eth_tunnel_conf *encap_param)
+{
+	char *pkt;
+	uint16_t len;
+	uint32_t hash;
+	uint16_t nb_rx = 0;
+	__m256i temp, cur;
+	__m256i shuf_msk = _mm256_set_epi8(
+		0xFF, 0, 1, 2,           /* high octet 0~2, 24 bits vni */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip vx_flags */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip udp len, cksum */
+		0xFF, 0xFF,              /* skip udp dst port */
+		8, 9,                    /* high octet 8~9, 16 bits udp src port */
+		8, 9, 10, 11,            /* low octet 8~11, 32 bits dst ip */
+		0, 1, 2, 3,              /* low octet 0~3, 32 bits src ip */
+		0xFF, 0xFF, 0xFF, 0xFF,  /* skip ttl, proto_id, hdr_csum */
+		0xFF, 0xFF, 0xFF, 0xFF   /* skip packet_id, fragment_offset */
+	);
+
+	
+	hash = rte_hash_crc(phdr, 2 * ETHER_ADDR_LEN, phdr->ether_type);
+
+        meta.src_ip = encap_param->tunnel_flow[0].dst_ip;
+        meta.dst_ip = encap_param->tunnel_flow[0].src_ip;
+        meta.vni = encap_param->tunnel_id;
+        meta.sport =  rte_cpu_to_be_16(((uint64_t) hash * PORT_RANGE) >> 32 + PORT_MIN);
+
+	while (nb_rx < nb_pkts) {
+		len = rte_pktmbuf_pkt_len(pkts[nb_rx]);
+		pkt = rte_pktmbuf_prepend(pkts[nb_rx], VXLAN_PKT_HDR_SIZE);
+
+		/* load 16B meta into 32B register */
+		cur = _mm256_cvtepu32_epi64(_mm_loadu_si128((__m128i *)meta));
+		temp = _mm256_set_epi16(0, 0, 0, 0,
+               		0, rte_cpu_to_be_16(len + UDP_PRE_SZ),
+               		rte_cpu_to_be_16(DEFAULT_VXLAN_PORT), 0,
+               		0, 0, 0, 0,
+               		0, 0x11FF, 0, 0);
+
+		rte_prefetch1(pkts);
+		cur = _mm256_shuffle_epi8(cur, shuf_msk);
+
+		/* write 4 Bytes, IP:4B */
+  		*(uint32_t *)(pkt[nb_rx] + ETHER_HDR_LEN) =
+			rte_cpu_to_be_32(0x4500 << 16 | (len + IP_PRE_SZ));
+
+		/* write 32Btyes, VXLAN:8 UDP:8 IP:16B */
+		_mm256_storeu_si256((__m256i *)(pkt[nb_rx] + PREFIX), cur);
+
+		cur = _mm256_or_si256(cur, temp);
+
+		/* write L2 header */
+		rte_memcpy(pkts[nb_rx], encap_param->peer_mac, MAC_LEN);
+		rte_memcpy(pkts[nb_rx] + MAC_LEN, encap_param->dst_mac, MAC_LEN);
+		*(uint32_t *)(pkts[nb_rx] + MAC_LEN * 2) = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+		nb_rx++;
+	}
+}
-- 
1.7.7.6

  parent reply	other threads:[~2015-12-23  8:50 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-23  8:49 [dpdk-dev] [RFC PATCH 0/6] General tunneling APIs Jijiang Liu
2015-12-23  8:49 ` [dpdk-dev] [RFC PATCH 1/6] rte_ether: extend rte_eth_tunnel_flow structure Jijiang Liu
2015-12-23  8:49 ` [dpdk-dev] [RFC PATCH 2/6] rte_ether: define tunnel flow structure and APIs Jijiang Liu
2015-12-23  8:49 ` [dpdk-dev] [RFC PATCH 3/6] rte_ether: implement tunnel config API Jijiang Liu
2015-12-23  8:49 ` [dpdk-dev] [RFC PATCH 4/6] rte_ether: define rte_eth_vxlan_decap and rte_eth_vxlan_encap Jijiang Liu
2015-12-23  8:49 ` Jijiang Liu [this message]
2015-12-23 18:32   ` [dpdk-dev] [RFC PATCH 5/6] rte_ether: implement encap and decap APIs Stephen Hemminger
2015-12-23  8:49 ` [dpdk-dev] [RFC PATCH 6/6] driver/i40e: tunnel configure in i40e Jijiang Liu
2015-12-23 11:17 ` [dpdk-dev] [RFC PATCH 0/6] General tunneling APIs Walukiewicz, Miroslaw
2015-12-28  5:54   ` Liu, Jijiang
2016-01-04 10:48     ` Walukiewicz, Miroslaw
2015-12-23 18:31 ` Stephen Hemminger
2015-12-28  1:46   ` Liu, Jijiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1450860592-12673-6-git-send-email-jijiang.liu@intel.com \
    --to=jijiang.liu@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).