* [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
@ 2015-07-23 16:12 Pablo de Lara
2015-07-23 17:25 ` Qiu, Michael
2015-07-27 14:43 ` Thomas Monjalon
0 siblings, 2 replies; 6+ messages in thread
From: Pablo de Lara @ 2015-07-23 16:12 UTC (permalink / raw)
To: dev
With the new hash implementation, the minimum lookup burst size
to get good performance is 8, since its internal pipeline
consists of 4 stages of 2 entries each, so to avoid
duplication, burst size should be 8 or more entries.
Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
examples/l3fwd/main.c | 234 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 191 insertions(+), 43 deletions(-)
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 45676ba..c8a0f66 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -731,26 +731,34 @@ static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
#if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
-#define MASK_ALL_PKTS 0xf
-#define EXECLUDE_1ST_PKT 0xe
-#define EXECLUDE_2ND_PKT 0xd
-#define EXECLUDE_3RD_PKT 0xb
-#define EXECLUDE_4TH_PKT 0x7
+#define MASK_ALL_PKTS 0xff
+#define EXCLUDE_1ST_PKT 0xfe
+#define EXCLUDE_2ND_PKT 0xfd
+#define EXCLUDE_3RD_PKT 0xfb
+#define EXCLUDE_4TH_PKT 0xf7
+#define EXCLUDE_5TH_PKT 0xef
+#define EXCLUDE_6TH_PKT 0xdf
+#define EXCLUDE_7TH_PKT 0xbf
+#define EXCLUDE_8TH_PKT 0x7f
static inline void
-simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
+simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct lcore_conf *qconf)
{
- struct ether_hdr *eth_hdr[4];
- struct ipv4_hdr *ipv4_hdr[4];
- uint8_t dst_port[4];
- int32_t ret[4];
- union ipv4_5tuple_host key[4];
- __m128i data[4];
+ struct ether_hdr *eth_hdr[8];
+ struct ipv4_hdr *ipv4_hdr[8];
+ uint8_t dst_port[8];
+ int32_t ret[8];
+ union ipv4_5tuple_host key[8];
+ __m128i data[8];
eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
/* Handle IPv4 headers.*/
ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *,
@@ -761,32 +769,56 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
sizeof(struct ether_hdr));
ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
sizeof(struct ether_hdr));
+ ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
#ifdef DO_RFC_1812_CHECKS
/* Check to make sure the packet is valid (RFC1812) */
uint8_t valid_mask = MASK_ALL_PKTS;
if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) {
rte_pktmbuf_free(m[0]);
- valid_mask &= EXECLUDE_1ST_PKT;
+ valid_mask &= EXCLUDE_1ST_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) {
rte_pktmbuf_free(m[1]);
- valid_mask &= EXECLUDE_2ND_PKT;
+ valid_mask &= EXCLUDE_2ND_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) {
rte_pktmbuf_free(m[2]);
- valid_mask &= EXECLUDE_3RD_PKT;
+ valid_mask &= EXCLUDE_3RD_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) {
rte_pktmbuf_free(m[3]);
- valid_mask &= EXECLUDE_4TH_PKT;
+ valid_mask &= EXCLUDE_4TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[4]);
+ valid_mask &= EXCLUDE_5TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[5]);
+ valid_mask &= EXCLUDE_6TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[6]);
+ valid_mask &= EXCLUDE_7TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[7]);
+ valid_mask &= EXCLUDE_8TH_PKT;
}
if (unlikely(valid_mask != MASK_ALL_PKTS)) {
if (valid_mask == 0){
return;
} else {
uint8_t i = 0;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 8; i++) {
if ((0x1 << i) & valid_mask) {
l3fwd_simple_forward(m[i], portid, qconf);
}
@@ -796,22 +828,52 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
}
#endif // End of #ifdef DO_RFC_1812_CHECKS
- data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
+ data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
key[0].xmm = _mm_and_si128(data[0], mask0);
key[1].xmm = _mm_and_si128(data[1], mask0);
key[2].xmm = _mm_and_si128(data[2], mask0);
key[3].xmm = _mm_and_si128(data[3], mask0);
+ key[4].xmm = _mm_and_si128(data[4], mask0);
+ key[5].xmm = _mm_and_si128(data[5], mask0);
+ key[6].xmm = _mm_and_si128(data[6], mask0);
+ key[7].xmm = _mm_and_si128(data[7], mask0);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
- const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
- rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 4, ret);
+ rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 8, ret);
dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
dst_port[0] = portid;
@@ -821,6 +883,14 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
dst_port[2] = portid;
if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
#ifdef DO_RFC_1812_CHECKS
/* Update time to live and header checksum */
@@ -832,6 +902,14 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
++(ipv4_hdr[1]->hdr_checksum);
++(ipv4_hdr[2]->hdr_checksum);
++(ipv4_hdr[3]->hdr_checksum);
+ --(ipv4_hdr[4]->time_to_live);
+ --(ipv4_hdr[5]->time_to_live);
+ --(ipv4_hdr[6]->time_to_live);
+ --(ipv4_hdr[7]->time_to_live);
+ ++(ipv4_hdr[4]->hdr_checksum);
+ ++(ipv4_hdr[5]->hdr_checksum);
+ ++(ipv4_hdr[6]->hdr_checksum);
+ ++(ipv4_hdr[7]->hdr_checksum);
#endif
/* dst addr */
@@ -839,17 +917,29 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
*(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
*(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
*(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
/* src addr */
ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr);
send_single_packet(m[0], (uint8_t)dst_port[0]);
send_single_packet(m[1], (uint8_t)dst_port[1]);
send_single_packet(m[2], (uint8_t)dst_port[2]);
send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
}
@@ -866,18 +956,22 @@ static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i m
}
static inline void
-simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
+simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct lcore_conf *qconf)
{
- struct ether_hdr *eth_hdr[4];
- __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[4];
- uint8_t dst_port[4];
- int32_t ret[4];
- union ipv6_5tuple_host key[4];
+ struct ether_hdr *eth_hdr[8];
+ __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
+ uint8_t dst_port[8];
+ int32_t ret[8];
+ union ipv6_5tuple_host key[8];
eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
/* Handle IPv6 headers.*/
ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *,
@@ -888,18 +982,36 @@ simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
sizeof(struct ether_hdr));
ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
sizeof(struct ether_hdr));
+ ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
+ get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
+ get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
+ get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
+ get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
- const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 4, ret);
- dst_port[0] = (uint8_t) ((ret[0] < 0)? portid:ipv6_l3fwd_out_if[ret[0]]);
- dst_port[1] = (uint8_t) ((ret[1] < 0)? portid:ipv6_l3fwd_out_if[ret[1]]);
- dst_port[2] = (uint8_t) ((ret[2] < 0)? portid:ipv6_l3fwd_out_if[ret[2]]);
- dst_port[3] = (uint8_t) ((ret[3] < 0)? portid:ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid:ipv6_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid:ipv6_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid:ipv6_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid:ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid:ipv6_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid:ipv6_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid:ipv6_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid:ipv6_l3fwd_out_if[ret[7]]);
if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
dst_port[0] = portid;
@@ -909,23 +1021,43 @@ simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
dst_port[2] = portid;
if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
/* dst addr */
*(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
*(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
*(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
*(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
/* src addr */
ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr);
send_single_packet(m[0], (uint8_t)dst_port[0]);
send_single_packet(m[1], (uint8_t)dst_port[1]);
send_single_packet(m[2], (uint8_t)dst_port[2]);
send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
}
#endif /* APP_LOOKUP_METHOD */
@@ -1548,19 +1680,23 @@ main_loop(__attribute__((unused)) void *dummy)
#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
{
/*
- * Send nb_rx - nb_rx%4 packets
- * in groups of 4.
+ * Send nb_rx - nb_rx%8 packets
+ * in groups of 8.
*/
- int32_t n = RTE_ALIGN_FLOOR(nb_rx, 4);
- for (j = 0; j < n ; j+=4) {
+ int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
+ for (j = 0; j < n; j+=8) {
#ifdef RTE_NEXT_ABI
uint32_t pkt_type =
pkts_burst[j]->packet_type &
pkts_burst[j+1]->packet_type &
pkts_burst[j+2]->packet_type &
- pkts_burst[j+3]->packet_type;
+ pkts_burst[j+3]->packet_type &
+ pkts_burst[j+4]->packet_type &
+ pkts_burst[j+5]->packet_type &
+ pkts_burst[j+6]->packet_type &
+ pkts_burst[j+7]->packet_type;
if (pkt_type & RTE_PTYPE_L3_IPV4) {
- simple_ipv4_fwd_4pkts(
+ simple_ipv4_fwd_8pkts(
&pkts_burst[j], portid, qconf);
} else if (pkt_type &
RTE_PTYPE_L3_IPV6) {
@@ -1568,9 +1704,13 @@ main_loop(__attribute__((unused)) void *dummy)
uint32_t ol_flag = pkts_burst[j]->ol_flags
& pkts_burst[j+1]->ol_flags
& pkts_burst[j+2]->ol_flags
- & pkts_burst[j+3]->ol_flags;
+ & pkts_burst[j+3]->ol_flags
+ & pkts_burst[j+4]->ol_flags
+ & pkts_burst[j+5]->ol_flags
+ & pkts_burst[j+6]->ol_flags
+ & pkts_burst[j+7]->ol_flags;
if (ol_flag & PKT_RX_IPV4_HDR ) {
- simple_ipv4_fwd_4pkts(&pkts_burst[j],
+ simple_ipv8_fwd_4pkts(&pkts_burst[j],
portid, qconf);
} else if (ol_flag & PKT_RX_IPV6_HDR) {
#endif /* RTE_NEXT_ABI */
@@ -1585,6 +1725,14 @@ main_loop(__attribute__((unused)) void *dummy)
portid, qconf);
l3fwd_simple_forward(pkts_burst[j+3],
portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+4],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+5],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+6],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+7],
+ portid, qconf);
}
}
for (; j < nb_rx ; j++) {
--
2.4.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
2015-07-23 16:12 [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8 Pablo de Lara
@ 2015-07-23 17:25 ` Qiu, Michael
2015-07-24 0:28 ` De Lara Guarch, Pablo
2015-07-27 14:43 ` Thomas Monjalon
1 sibling, 1 reply; 6+ messages in thread
From: Qiu, Michael @ 2015-07-23 17:25 UTC (permalink / raw)
To: De Lara Guarch, Pablo, dev
Hi, Pablo
Is there any performance data for this change?
Thanks,
Michael
-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
Sent: Thursday, July 23, 2015 9:12 AM
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
With the new hash implementation, the minimum lookup burst size to get good performance is 8, since its internal pipeline consists of 4 stages of 2 entries each, so to avoid duplication, burst size should be 8 or more entries.
Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
examples/l3fwd/main.c | 234 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 191 insertions(+), 43 deletions(-)
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index 45676ba..c8a0f66 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -731,26 +731,34 @@ static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
-#define MASK_ALL_PKTS 0xf
-#define EXECLUDE_1ST_PKT 0xe
-#define EXECLUDE_2ND_PKT 0xd
-#define EXECLUDE_3RD_PKT 0xb
-#define EXECLUDE_4TH_PKT 0x7
+#define MASK_ALL_PKTS 0xff
+#define EXCLUDE_1ST_PKT 0xfe
+#define EXCLUDE_2ND_PKT 0xfd
+#define EXCLUDE_3RD_PKT 0xfb
+#define EXCLUDE_4TH_PKT 0xf7
+#define EXCLUDE_5TH_PKT 0xef
+#define EXCLUDE_6TH_PKT 0xdf
+#define EXCLUDE_7TH_PKT 0xbf
+#define EXCLUDE_8TH_PKT 0x7f
static inline void
-simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
+simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct
+lcore_conf *qconf)
{
- struct ether_hdr *eth_hdr[4];
- struct ipv4_hdr *ipv4_hdr[4];
- uint8_t dst_port[4];
- int32_t ret[4];
- union ipv4_5tuple_host key[4];
- __m128i data[4];
+ struct ether_hdr *eth_hdr[8];
+ struct ipv4_hdr *ipv4_hdr[8];
+ uint8_t dst_port[8];
+ int32_t ret[8];
+ union ipv4_5tuple_host key[8];
+ __m128i data[8];
eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
/* Handle IPv4 headers.*/
ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *, @@ -761,32 +769,56 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
sizeof(struct ether_hdr));
ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
sizeof(struct ether_hdr));
+ ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
#ifdef DO_RFC_1812_CHECKS
/* Check to make sure the packet is valid (RFC1812) */
uint8_t valid_mask = MASK_ALL_PKTS;
if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) {
rte_pktmbuf_free(m[0]);
- valid_mask &= EXECLUDE_1ST_PKT;
+ valid_mask &= EXCLUDE_1ST_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) {
rte_pktmbuf_free(m[1]);
- valid_mask &= EXECLUDE_2ND_PKT;
+ valid_mask &= EXCLUDE_2ND_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) {
rte_pktmbuf_free(m[2]);
- valid_mask &= EXECLUDE_3RD_PKT;
+ valid_mask &= EXCLUDE_3RD_PKT;
}
if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) {
rte_pktmbuf_free(m[3]);
- valid_mask &= EXECLUDE_4TH_PKT;
+ valid_mask &= EXCLUDE_4TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[4]);
+ valid_mask &= EXCLUDE_5TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[5]);
+ valid_mask &= EXCLUDE_6TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[6]);
+ valid_mask &= EXCLUDE_7TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[7]);
+ valid_mask &= EXCLUDE_8TH_PKT;
}
if (unlikely(valid_mask != MASK_ALL_PKTS)) {
if (valid_mask == 0){
return;
} else {
uint8_t i = 0;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 8; i++) {
if ((0x1 << i) & valid_mask) {
l3fwd_simple_forward(m[i], portid, qconf);
}
@@ -796,22 +828,52 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
}
#endif // End of #ifdef DO_RFC_1812_CHECKS
- data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
- data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
+ data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
key[0].xmm = _mm_and_si128(data[0], mask0);
key[1].xmm = _mm_and_si128(data[1], mask0);
key[2].xmm = _mm_and_si128(data[2], mask0);
key[3].xmm = _mm_and_si128(data[3], mask0);
+ key[4].xmm = _mm_and_si128(data[4], mask0);
+ key[5].xmm = _mm_and_si128(data[5], mask0);
+ key[6].xmm = _mm_and_si128(data[6], mask0);
+ key[7].xmm = _mm_and_si128(data[7], mask0);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
- const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
- rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 4, ret);
+ rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 8,
+ret);
dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid :
+ipv4_l3fwd_out_if[ret[7]]);
if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
dst_port[0] = portid;
@@ -821,6 +883,14 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
dst_port[2] = portid;
if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
#ifdef DO_RFC_1812_CHECKS
/* Update time to live and header checksum */ @@ -832,6 +902,14 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
++(ipv4_hdr[1]->hdr_checksum);
++(ipv4_hdr[2]->hdr_checksum);
++(ipv4_hdr[3]->hdr_checksum);
+ --(ipv4_hdr[4]->time_to_live);
+ --(ipv4_hdr[5]->time_to_live);
+ --(ipv4_hdr[6]->time_to_live);
+ --(ipv4_hdr[7]->time_to_live);
+ ++(ipv4_hdr[4]->hdr_checksum);
+ ++(ipv4_hdr[5]->hdr_checksum);
+ ++(ipv4_hdr[6]->hdr_checksum);
+ ++(ipv4_hdr[7]->hdr_checksum);
#endif
/* dst addr */
@@ -839,17 +917,29 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
*(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
*(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
*(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
/* src addr */
ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr);
send_single_packet(m[0], (uint8_t)dst_port[0]);
send_single_packet(m[1], (uint8_t)dst_port[1]);
send_single_packet(m[2], (uint8_t)dst_port[2]);
send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
}
@@ -866,18 +956,22 @@ static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i m }
static inline void
-simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
+simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct
+lcore_conf *qconf)
{
- struct ether_hdr *eth_hdr[4];
- __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[4];
- uint8_t dst_port[4];
- int32_t ret[4];
- union ipv6_5tuple_host key[4];
+ struct ether_hdr *eth_hdr[8];
+ __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
+ uint8_t dst_port[8];
+ int32_t ret[8];
+ union ipv6_5tuple_host key[8];
eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
/* Handle IPv6 headers.*/
ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *, @@ -888,18 +982,36 @@ simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
sizeof(struct ether_hdr));
ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
sizeof(struct ether_hdr));
+ ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
+ get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
+ get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
+ get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
+ get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
- const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 4, ret);
- dst_port[0] = (uint8_t) ((ret[0] < 0)? portid:ipv6_l3fwd_out_if[ret[0]]);
- dst_port[1] = (uint8_t) ((ret[1] < 0)? portid:ipv6_l3fwd_out_if[ret[1]]);
- dst_port[2] = (uint8_t) ((ret[2] < 0)? portid:ipv6_l3fwd_out_if[ret[2]]);
- dst_port[3] = (uint8_t) ((ret[3] < 0)? portid:ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid:ipv6_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid:ipv6_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid:ipv6_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid:ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid:ipv6_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid:ipv6_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid:ipv6_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ?
+portid:ipv6_l3fwd_out_if[ret[7]]);
if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
dst_port[0] = portid;
@@ -909,23 +1021,43 @@ simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *
dst_port[2] = portid;
if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
/* dst addr */
*(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
*(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
*(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
*(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
/* src addr */
ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr);
ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr);
send_single_packet(m[0], (uint8_t)dst_port[0]);
send_single_packet(m[1], (uint8_t)dst_port[1]);
send_single_packet(m[2], (uint8_t)dst_port[2]);
send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
}
#endif /* APP_LOOKUP_METHOD */
@@ -1548,19 +1680,23 @@ main_loop(__attribute__((unused)) void *dummy) #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
{
/*
- * Send nb_rx - nb_rx%4 packets
- * in groups of 4.
+ * Send nb_rx - nb_rx%8 packets
+ * in groups of 8.
*/
- int32_t n = RTE_ALIGN_FLOOR(nb_rx, 4);
- for (j = 0; j < n ; j+=4) {
+ int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
+ for (j = 0; j < n; j+=8) {
#ifdef RTE_NEXT_ABI
uint32_t pkt_type =
pkts_burst[j]->packet_type &
pkts_burst[j+1]->packet_type &
pkts_burst[j+2]->packet_type &
- pkts_burst[j+3]->packet_type;
+ pkts_burst[j+3]->packet_type &
+ pkts_burst[j+4]->packet_type &
+ pkts_burst[j+5]->packet_type &
+ pkts_burst[j+6]->packet_type &
+ pkts_burst[j+7]->packet_type;
if (pkt_type & RTE_PTYPE_L3_IPV4) {
- simple_ipv4_fwd_4pkts(
+ simple_ipv4_fwd_8pkts(
&pkts_burst[j], portid, qconf);
} else if (pkt_type &
RTE_PTYPE_L3_IPV6) {
@@ -1568,9 +1704,13 @@ main_loop(__attribute__((unused)) void *dummy)
uint32_t ol_flag = pkts_burst[j]->ol_flags
& pkts_burst[j+1]->ol_flags
& pkts_burst[j+2]->ol_flags
- & pkts_burst[j+3]->ol_flags;
+ & pkts_burst[j+3]->ol_flags
+ & pkts_burst[j+4]->ol_flags
+ & pkts_burst[j+5]->ol_flags
+ & pkts_burst[j+6]->ol_flags
+ & pkts_burst[j+7]->ol_flags;
if (ol_flag & PKT_RX_IPV4_HDR ) {
- simple_ipv4_fwd_4pkts(&pkts_burst[j],
+ simple_ipv8_fwd_4pkts(&pkts_burst[j],
portid, qconf);
} else if (ol_flag & PKT_RX_IPV6_HDR) { #endif /* RTE_NEXT_ABI */ @@ -1585,6 +1725,14 @@ main_loop(__attribute__((unused)) void *dummy)
portid, qconf);
l3fwd_simple_forward(pkts_burst[j+3],
portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+4],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+5],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+6],
+ portid, qconf);
+ l3fwd_simple_forward(pkts_burst[j+7],
+ portid, qconf);
}
}
for (; j < nb_rx ; j++) {
--
2.4.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
2015-07-23 17:25 ` Qiu, Michael
@ 2015-07-24 0:28 ` De Lara Guarch, Pablo
2015-07-24 1:08 ` Xu, Qian Q
0 siblings, 1 reply; 6+ messages in thread
From: De Lara Guarch, Pablo @ 2015-07-24 0:28 UTC (permalink / raw)
To: Qiu, Michael, dev
Hi Michael,
> -----Original Message-----
> From: Qiu, Michael
> Sent: Thursday, July 23, 2015 6:26 PM
> To: De Lara Guarch, Pablo; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size
> to 8
>
> Hi, Pablo
>
> Is there any performance data for this change?
With this change, performance increases around 12% compared to the version
before the patch.
Thanks,
Pablo
>
> Thanks,
> Michael
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> Sent: Thursday, July 23, 2015 9:12 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
>
> With the new hash implementation, the minimum lookup burst size to get
> good performance is 8, since its internal pipeline consists of 4 stages of 2
> entries each, so to avoid duplication, burst size should be 8 or more entries.
>
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
2015-07-24 0:28 ` De Lara Guarch, Pablo
@ 2015-07-24 1:08 ` Xu, Qian Q
2015-07-24 1:14 ` De Lara Guarch, Pablo
0 siblings, 1 reply; 6+ messages in thread
From: Xu, Qian Q @ 2015-07-24 1:08 UTC (permalink / raw)
To: De Lara Guarch, Pablo, Qiu, Michael, dev
And this patch will be only related to exact match, the common usage of lpm is not impacted.
Thanks
Qian
-----Original Message-----
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of De Lara Guarch, Pablo
Sent: Friday, July 24, 2015 8:29 AM
To: Qiu, Michael; dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
Hi Michael,
> -----Original Message-----
> From: Qiu, Michael
> Sent: Thursday, July 23, 2015 6:26 PM
> To: De Lara Guarch, Pablo; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst
> size to 8
>
> Hi, Pablo
>
> Is there any performance data for this change?
With this change, performance increases around 12% compared to the version before the patch.
Thanks,
Pablo
>
> Thanks,
> Michael
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> Sent: Thursday, July 23, 2015 9:12 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size
> to 8
>
> With the new hash implementation, the minimum lookup burst size to get
> good performance is 8, since its internal pipeline consists of 4
> stages of 2 entries each, so to avoid duplication, burst size should be 8 or more entries.
>
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
2015-07-24 1:08 ` Xu, Qian Q
@ 2015-07-24 1:14 ` De Lara Guarch, Pablo
0 siblings, 0 replies; 6+ messages in thread
From: De Lara Guarch, Pablo @ 2015-07-24 1:14 UTC (permalink / raw)
To: Xu, Qian Q, Qiu, Michael, dev
> -----Original Message-----
> From: Xu, Qian Q
> Sent: Friday, July 24, 2015 2:09 AM
> To: De Lara Guarch, Pablo; Qiu, Michael; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size
> to 8
>
> And this patch will be only related to exact match, the common usage of lpm
> is not impacted.
That is right, thanks for pointing it out, Qian!
Pablo
>
> Thanks
> Qian
>
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of De Lara Guarch,
> Pablo
> Sent: Friday, July 24, 2015 8:29 AM
> To: Qiu, Michael; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size
> to 8
>
> Hi Michael,
>
> > -----Original Message-----
> > From: Qiu, Michael
> > Sent: Thursday, July 23, 2015 6:26 PM
> > To: De Lara Guarch, Pablo; dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst
> > size to 8
> >
> > Hi, Pablo
> >
> > Is there any performance data for this change?
>
> With this change, performance increases around 12% compared to the
> version before the patch.
>
> Thanks,
> Pablo
>
> >
> > Thanks,
> > Michael
> >
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> > Sent: Thursday, July 23, 2015 9:12 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size
> > to 8
> >
> > With the new hash implementation, the minimum lookup burst size to get
> > good performance is 8, since its internal pipeline consists of 4
> > stages of 2 entries each, so to avoid duplication, burst size should be 8 or
> more entries.
> >
> > Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8
2015-07-23 16:12 [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8 Pablo de Lara
2015-07-23 17:25 ` Qiu, Michael
@ 2015-07-27 14:43 ` Thomas Monjalon
1 sibling, 0 replies; 6+ messages in thread
From: Thomas Monjalon @ 2015-07-27 14:43 UTC (permalink / raw)
To: Pablo de Lara; +Cc: dev
2015-07-23 17:12, Pablo de Lara:
> With the new hash implementation, the minimum lookup burst size
> to get good performance is 8, since its internal pipeline
> consists of 4 stages of 2 entries each, so to avoid
> duplication, burst size should be 8 or more entries.
>
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
Applied, thanks
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2015-07-27 14:45 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-23 16:12 [dpdk-dev] [PATCH] examples/l3fwd: increase lookup burst size to 8 Pablo de Lara
2015-07-23 17:25 ` Qiu, Michael
2015-07-24 0:28 ` De Lara Guarch, Pablo
2015-07-24 1:08 ` Xu, Qian Q
2015-07-24 1:14 ` De Lara Guarch, Pablo
2015-07-27 14:43 ` Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).