From: <Maciej.Czekaj@caviumnetworks.com>
To: <dev@dpdk.org>
Cc: <tomaszx.kulasek@intel.com>, <thomas.monjalon@6wind.com>,
Maciej Czekaj <Maciej.Czekaj@caviumnetworks.com>
Subject: [dpdk-dev] [PATCH v2] l3fwd: Fix compilation & enable exact match mode on ARM.
Date: Tue, 15 Mar 2016 21:05:43 +0100 [thread overview]
Message-ID: <1458072343-32012-2-git-send-email-Maciej.Czekaj@caviumnetworks.com> (raw)
In-Reply-To: <1458072343-32012-1-git-send-email-Maciej.Czekaj@caviumnetworks.com>
From: Maciej Czekaj <Maciej.Czekaj@caviumnetworks.com>
Enable NEON support in exact match mode.
l3fwd example did not compile on ARM due to SSE2 instrincics used
in generic part.
Some instrinsins were used to initialize data structures and those were
replaced by ordinary structure initalization.
All SSE2 intrinsics used in forwarding, i.e. masking the IP/TCP header
are moved to single inline function and made arch-specific.
Signed-off-by: Maciej Czekaj <Maciej.Czekaj@caviumnetworks.com>
---
examples/l3fwd/l3fwd.h | 4 ++-
examples/l3fwd/l3fwd_em.c | 72 ++++++++++++++++++++++++---------------
examples/l3fwd/l3fwd_em_hlm_sse.h | 32 ++++++++---------
examples/l3fwd/main.c | 2 +-
4 files changed, 64 insertions(+), 46 deletions(-)
diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index da6d369..7dcc7e5 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -34,6 +34,8 @@
#ifndef __L3_FWD_H__
#define __L3_FWD_H__
+#include <rte_vect.h>
+
#define DO_RFC_1812_CHECKS
#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
@@ -103,7 +105,7 @@ extern uint32_t enabled_port_mask;
extern int ipv6; /**< ipv6 is false by default. */
extern uint32_t hash_entry_number;
-extern __m128i val_eth[RTE_MAX_ETHPORTS];
+extern xmm_t val_eth[RTE_MAX_ETHPORTS];
extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index f6a65d8..0adf8f4 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -85,7 +85,7 @@ union ipv4_5tuple_host {
uint16_t port_src;
uint16_t port_dst;
};
- __m128i xmm;
+ xmm_t xmm;
};
#define XMM_NUM_IN_IPV6_5TUPLE 3
@@ -109,9 +109,11 @@ union ipv6_5tuple_host {
uint16_t port_dst;
uint64_t reserve;
};
- __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
+ xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE];
};
+
+
struct ipv4_l3fwd_em_route {
struct ipv4_5tuple key;
uint8_t if_out;
@@ -236,9 +238,27 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
-static __m128i mask0;
-static __m128i mask1;
-static __m128i mask2;
+static rte_xmm_t mask0;
+static rte_xmm_t mask1;
+static rte_xmm_t mask2;
+
+#if defined(__SSE2__)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+ __m128i data = _mm_loadu_si128((__m128i *)(key));
+
+ return _mm_and_si128(data, mask);
+}
+#elif defined(__ARM_NEON)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+ int32x4_t data = vld1q_s32((int32_t *)key);
+
+ return vandq_s32(data, mask);
+}
+#endif
static inline uint8_t
em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct)
@@ -249,13 +269,12 @@ em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct)
(struct rte_hash *)lookup_struct;
ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
- __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr));
/*
* Get 5 tuple: dst port, src port, dst IP address,
* src IP address and protocol.
*/
- key.xmm = _mm_and_si128(data, mask0);
+ key.xmm = em_mask_key(ipv4_hdr, mask0.x);
/* Find destination port */
ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
@@ -271,35 +290,31 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct)
(struct rte_hash *)lookup_struct;
ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
- __m128i data0 =
- _mm_loadu_si128((__m128i *)(ipv6_hdr));
- __m128i data1 =
- _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr)+
- sizeof(__m128i)));
- __m128i data2 =
- _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr)+
- sizeof(__m128i)+sizeof(__m128i)));
+ void *data0 = ipv6_hdr;
+ void *data1 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t);
+ void *data2 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t) + sizeof(xmm_t);
/* Get part of 5 tuple: src IP address lower 96 bits and protocol */
- key.xmm[0] = _mm_and_si128(data0, mask1);
+ key.xmm[0] = em_mask_key(data0, mask1.x);
/*
* Get part of 5 tuple: dst IP address lower 96 bits
* and src IP address higher 32 bits.
*/
- key.xmm[1] = data1;
+ key.xmm[1] = *(xmm_t *)data1;
/*
* Get part of 5 tuple: dst port and src port
* and dst IP address higher 32 bits.
*/
- key.xmm[2] = _mm_and_si128(data2, mask2);
+ key.xmm[2] = em_mask_key(data2, mask2.x);
/* Find destination port */
ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
}
+
/*
* Include header file if SSE4_1 is enabled for
* buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
@@ -348,14 +363,15 @@ convert_ipv6_5tuple(struct ipv6_5tuple *key1,
#define BYTE_VALUE_MAX 256
#define ALL_32_BITS 0xffffffff
#define BIT_8_TO_15 0x0000ff00
+
static inline void
populate_ipv4_few_flow_into_table(const struct rte_hash *h)
{
uint32_t i;
int32_t ret;
- mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS,
- ALL_32_BITS, BIT_8_TO_15);
+ mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
for (i = 0; i < IPV4_L3FWD_EM_NUM_ROUTES; i++) {
struct ipv4_l3fwd_em_route entry;
@@ -381,10 +397,10 @@ populate_ipv6_few_flow_into_table(const struct rte_hash *h)
uint32_t i;
int32_t ret;
- mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS,
- ALL_32_BITS, BIT_16_TO_23);
+ mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
- mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
+ mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} };
for (i = 0; i < IPV6_L3FWD_EM_NUM_ROUTES; i++) {
struct ipv6_l3fwd_em_route entry;
@@ -410,8 +426,8 @@ populate_ipv4_many_flow_into_table(const struct rte_hash *h,
{
unsigned i;
- mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS,
- ALL_32_BITS, BIT_8_TO_15);
+ mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
for (i = 0; i < nr_flow; i++) {
struct ipv4_l3fwd_em_route entry;
@@ -462,9 +478,9 @@ populate_ipv6_many_flow_into_table(const struct rte_hash *h,
{
unsigned i;
- mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS,
- ALL_32_BITS, BIT_16_TO_23);
- mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
+ mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
+ mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} };
for (i = 0; i < nr_flow; i++) {
struct ipv6_l3fwd_em_route entry;
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h b/examples/l3fwd/l3fwd_em_hlm_sse.h
index d3388da..eb23163 100644
--- a/examples/l3fwd/l3fwd_em_hlm_sse.h
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -77,14 +77,14 @@ em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
sizeof(struct ether_hdr) +
offsetof(struct ipv4_hdr, time_to_live)));
- key[0].xmm = _mm_and_si128(data[0], mask0);
- key[1].xmm = _mm_and_si128(data[1], mask0);
- key[2].xmm = _mm_and_si128(data[2], mask0);
- key[3].xmm = _mm_and_si128(data[3], mask0);
- key[4].xmm = _mm_and_si128(data[4], mask0);
- key[5].xmm = _mm_and_si128(data[5], mask0);
- key[6].xmm = _mm_and_si128(data[6], mask0);
- key[7].xmm = _mm_and_si128(data[7], mask0);
+ key[0].xmm = _mm_and_si128(data[0], mask0.x);
+ key[1].xmm = _mm_and_si128(data[1], mask0.x);
+ key[2].xmm = _mm_and_si128(data[2], mask0.x);
+ key[3].xmm = _mm_and_si128(data[3], mask0.x);
+ key[4].xmm = _mm_and_si128(data[4], mask0.x);
+ key[5].xmm = _mm_and_si128(data[5], mask0.x);
+ key[6].xmm = _mm_and_si128(data[6], mask0.x);
+ key[7].xmm = _mm_and_si128(data[7], mask0.x);
const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
&key[4], &key[5], &key[6], &key[7]};
@@ -175,14 +175,14 @@ em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
int32_t ret[8];
union ipv6_5tuple_host key[8];
- get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
- get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
- get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
- get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
- get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
- get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
- get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
- get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
+ get_ipv6_5tuple(m[0], mask1.x, mask2.x, &key[0]);
+ get_ipv6_5tuple(m[1], mask1.x, mask2.x, &key[1]);
+ get_ipv6_5tuple(m[2], mask1.x, mask2.x, &key[2]);
+ get_ipv6_5tuple(m[3], mask1.x, mask2.x, &key[3]);
+ get_ipv6_5tuple(m[4], mask1.x, mask2.x, &key[4]);
+ get_ipv6_5tuple(m[5], mask1.x, mask2.x, &key[5]);
+ get_ipv6_5tuple(m[6], mask1.x, mask2.x, &key[6]);
+ get_ipv6_5tuple(m[7], mask1.x, mask2.x, &key[7]);
const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
&key[4], &key[5], &key[6], &key[7]};
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 0e33039..8520f71 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -112,7 +112,7 @@ volatile bool force_quit;
uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
-__m128i val_eth[RTE_MAX_ETHPORTS];
+xmm_t val_eth[RTE_MAX_ETHPORTS];
/* mask of enabled ports */
uint32_t enabled_port_mask;
--
1.9.1
next prev parent reply other threads:[~2016-03-15 20:06 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-10 16:06 [dpdk-dev] [PATCH] " Maciej.Czekaj
2016-03-10 16:06 ` Maciej.Czekaj
2016-03-11 15:16 ` Thomas Monjalon
2016-03-15 20:05 ` [dpdk-dev] [PATCH v2] " Maciej.Czekaj
2016-03-15 20:05 ` Maciej.Czekaj [this message]
2016-03-15 20:49 ` Thomas Monjalon
2016-03-15 23:47 ` [dpdk-dev] Odp.: " Czekaj, Maciej
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1458072343-32012-2-git-send-email-Maciej.Czekaj@caviumnetworks.com \
--to=maciej.czekaj@caviumnetworks.com \
--cc=dev@dpdk.org \
--cc=thomas.monjalon@6wind.com \
--cc=tomaszx.kulasek@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).