From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id B6242B0A5 for ; Wed, 28 May 2014 19:32:43 +0200 (CEST) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 28 May 2014 10:32:55 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.98,929,1392192000"; d="scan'208";a="546285476" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by fmsmga002.fm.intel.com with ESMTP; 28 May 2014 10:32:50 -0700 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id s4SHWnaj004064; Wed, 28 May 2014 18:32:49 +0100 Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id s4SHWnh8031749; Wed, 28 May 2014 18:32:49 +0100 Received: (from aburakov@localhost) by sivswdev02.ir.intel.com with id s4SHWnY8031745; Wed, 28 May 2014 18:32:49 +0100 From: Anatoly Burakov To: dev@dpdk.org Date: Wed, 28 May 2014 18:32:47 +0100 Message-Id: <392839b64d4044bd12048440f3056e6c7790c9e0.1401298292.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 13/13] examples: overhaul of ip_reassembly app X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 May 2014 17:32:46 -0000 New stuff: * Support for regular traffic as well as IPv4 and IPv6 * Simplified config * Routing table printed out on start * Uses LPM/LPM6 for lookup * Unmatched traffic is sent to the originating port Signed-off-by: Anatoly Burakov --- examples/ip_reassembly/Makefile | 1 - examples/ip_reassembly/main.c | 1344 +++++++++++++-------------------------- 2 files changed, 435 insertions(+), 910 deletions(-) diff --git a/examples/ip_reassembly/Makefile b/examples/ip_reassembly/Makefile index 3115b95..9c9e0fa 100644 --- a/examples/ip_reassembly/Makefile +++ b/examples/ip_reassembly/Makefile @@ -52,7 +52,6 @@ CFLAGS += $(WERROR_FLAGS) # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_main.o += -Wno-return-type -CFLAGS_main.o += -DIPV4_FRAG_TBL_STAT endif include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c index 6c40d76..da3a0db 100644 --- a/examples/ip_reassembly/main.c +++ b/examples/ip_reassembly/main.c @@ -1,13 +1,13 @@ /*- * BSD LICENSE - * + * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright @@ -17,7 +17,7 @@ * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -73,54 +74,29 @@ #include #include #include -#include "main.h" - -#define APP_LOOKUP_EXACT_MATCH 0 -#define APP_LOOKUP_LPM 1 -#define DO_RFC_1812_CHECKS - -#ifndef APP_LOOKUP_METHOD -#define APP_LOOKUP_METHOD APP_LOOKUP_LPM -#endif - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -#include -#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) #include #include -#else -#error "APP_LOOKUP_METHOD set to incorrect value" -#endif -#define MAX_PKT_BURST 32 - -#include "rte_ip_frag.h" +#include -#ifndef IPv6_BYTES -#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ - "%02x%02x:%02x%02x:%02x%02x:%02x%02x" -#define IPv6_BYTES(addr) \ - addr[0], addr[1], addr[2], addr[3], \ - addr[4], addr[5], addr[6], addr[7], \ - addr[8], addr[9], addr[10], addr[11],\ - addr[12], addr[13],addr[14], addr[15] -#endif +#include "main.h" +#define MAX_PKT_BURST 32 -#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 -#define MAX_PORTS RTE_MAX_ETHPORTS +#define RTE_LOGTYPE_IP_RSMBL RTE_LOGTYPE_USER1 #define MAX_JUMBO_PKT_LEN 9600 -#define IPV6_ADDR_LEN 16 - -#define MEMPOOL_CACHE_SIZE 256 - #define BUF_SIZE 2048 #define MBUF_SIZE \ (BUF_SIZE + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define NB_MBUF 8192 + +/* allow max jumbo frame 9.5 KB */ +#define JUMBO_FRAME_MAX_SIZE 0x2600 + #define MAX_FLOW_NUM UINT16_MAX #define MIN_FLOW_NUM 1 #define DEF_FLOW_NUM 0x1000 @@ -130,10 +106,10 @@ #define MIN_FLOW_TTL 1 #define DEF_FLOW_TTL MS_PER_S -#define DEF_MBUF_NUM 0x400 +#define MAX_FRAG_NUM RTE_LIBRTE_IP_FRAG_MAX_FRAG /* Should be power of two. */ -#define IPV4_FRAG_TBL_BUCKET_ENTRIES 2 +#define IP_FRAG_TBL_BUCKET_ENTRIES 16 static uint32_t max_flow_num = DEF_FLOW_NUM; static uint32_t max_flow_ttl = DEF_FLOW_TTL; @@ -174,12 +150,33 @@ static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /* ethernet addresses of ports */ -static struct ether_addr ports_eth_addr[MAX_PORTS]; +static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +#ifndef IPv4_BYTES +#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8 +#define IPv4_BYTES(addr) \ + (uint8_t) (((addr) >> 24) & 0xFF),\ + (uint8_t) (((addr) >> 16) & 0xFF),\ + (uint8_t) (((addr) >> 8) & 0xFF),\ + (uint8_t) ((addr) & 0xFF) +#endif + +#ifndef IPv6_BYTES +#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ + "%02x%02x:%02x%02x:%02x%02x:%02x%02x" +#define IPv6_BYTES(addr) \ + addr[0], addr[1], addr[2], addr[3], \ + addr[4], addr[5], addr[6], addr[7], \ + addr[8], addr[9], addr[10], addr[11],\ + addr[12], addr[13],addr[14], addr[15] +#endif + +#define IPV6_ADDR_LEN 16 /* mask of enabled ports */ static uint32_t enabled_port_mask = 0; -static int promiscuous_on = 0; /**< Ports set in promiscuous mode off by default. */ -static int numa_on = 1; /**< NUMA is enabled by default. */ + +static int rx_queue_per_lcore = 1; struct mbuf_table { uint32_t len; @@ -188,54 +185,50 @@ struct mbuf_table { struct rte_mbuf *m_table[0]; }; -struct lcore_rx_queue { - uint8_t port_id; - uint8_t queue_id; -} __rte_cache_aligned; +struct rx_queue { + struct rte_ip_frag_tbl * frag_tbl; + struct rte_mempool * pool; + struct rte_lpm * lpm; + struct rte_lpm6 * lpm6; + uint8_t portid; +}; + +struct tx_lcore_stat { + uint64_t call; + uint64_t drop; + uint64_t queue; + uint64_t send; +}; #define MAX_RX_QUEUE_PER_LCORE 16 -#define MAX_TX_QUEUE_PER_PORT MAX_PORTS +#define MAX_TX_QUEUE_PER_PORT 16 #define MAX_RX_QUEUE_PER_PORT 128 -#define MAX_LCORE_PARAMS 1024 -struct lcore_params { - uint8_t port_id; - uint8_t queue_id; - uint8_t lcore_id; +struct lcore_queue_conf { + uint16_t n_rx_queue; + struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; + uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + struct rte_ip_frag_death_row death_row; + struct mbuf_table *tx_mbufs[RTE_MAX_ETHPORTS]; + struct tx_lcore_stat tx_stat; } __rte_cache_aligned; - -static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS]; -static struct lcore_params lcore_params_array_default[] = { - {0, 0, 2}, - {0, 1, 2}, - {0, 2, 2}, - {1, 0, 2}, - {1, 1, 2}, - {1, 2, 2}, - {2, 0, 2}, - {3, 0, 3}, - {3, 1, 3}, -}; - -static struct lcore_params * lcore_params = lcore_params_array_default; -static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / - sizeof(lcore_params_array_default[0]); +static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE]; static struct rte_eth_conf port_conf = { .rxmode = { - .mq_mode = ETH_MQ_RX_RSS, - .max_rx_pkt_len = ETHER_MAX_LEN, + .mq_mode = ETH_MQ_RX_RSS, + .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE, .split_hdr_size = 0, .header_split = 0, /**< Header Split disabled */ .hw_ip_checksum = 1, /**< IP checksum offload enabled */ .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .jumbo_frame = 1, /**< Jumbo Frame Support disabled */ .hw_strip_crc = 0, /**< CRC stripped by hardware */ }, .rx_adv_conf = { - .rss_conf = { - .rss_key = NULL, - .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6, + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6, }, }, .txmode = { @@ -263,102 +256,37 @@ static const struct rte_eth_txconf tx_conf = { .txq_flags = 0x0, }; -#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) - -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 -#include -#define DEFAULT_HASH_FUNC rte_hash_crc -#else -#include -#define DEFAULT_HASH_FUNC rte_jhash -#endif - -struct ipv4_5tuple { - uint32_t ip_dst; - uint32_t ip_src; - uint16_t port_dst; - uint16_t port_src; - uint8_t proto; -} __attribute__((__packed__)); - -struct ipv6_5tuple { - uint8_t ip_dst[IPV6_ADDR_LEN]; - uint8_t ip_src[IPV6_ADDR_LEN]; - uint16_t port_dst; - uint16_t port_src; - uint8_t proto; -} __attribute__((__packed__)); - -struct ipv4_l3fwd_route { - struct ipv4_5tuple key; - uint8_t if_out; -}; - -struct ipv6_l3fwd_route { - struct ipv6_5tuple key; - uint8_t if_out; -}; - -static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { - {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0}, - {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1}, - {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2}, - {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3}, -}; - -static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { - { - { - {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05}, - {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a}, - 1, 10, IPPROTO_UDP - }, 4 - }, -}; - -typedef struct rte_hash lookup_struct_t; -static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; -static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; - -#define L3FWD_HASH_ENTRIES 1024 - -#define IPV4_L3FWD_NUM_ROUTES \ - (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) - -#define IPV6_L3FWD_NUM_ROUTES \ - (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) - -static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; -static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; -#endif - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) -struct ipv4_l3fwd_route { +/* + * IPv4 forwarding table + */ +struct l3fwd_ipv4_route { uint32_t ip; uint8_t depth; uint8_t if_out; }; -struct ipv6_l3fwd_route { - uint8_t ip[16]; - uint8_t depth; - uint8_t if_out; +struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = { + {IPv4(100,10,0,0), 16, 0}, + {IPv4(100,20,0,0), 16, 1}, + {IPv4(100,30,0,0), 16, 2}, + {IPv4(100,40,0,0), 16, 3}, + {IPv4(100,50,0,0), 16, 4}, + {IPv4(100,60,0,0), 16, 5}, + {IPv4(100,70,0,0), 16, 6}, + {IPv4(100,80,0,0), 16, 7}, }; -static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = { - {IPv4(1,1,1,0), 24, 0}, - {IPv4(2,1,1,0), 24, 1}, - {IPv4(3,1,1,0), 24, 2}, - {IPv4(4,1,1,0), 24, 3}, - {IPv4(5,1,1,0), 24, 4}, - {IPv4(6,1,1,0), 24, 5}, - {IPv4(7,1,1,0), 24, 6}, - {IPv4(8,1,1,0), 24, 7}, +/* + * IPv6 forwarding table + */ + +struct l3fwd_ipv6_route { + uint8_t ip[IPV6_ADDR_LEN]; + uint8_t depth; + uint8_t if_out; }; -static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { +static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = { {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0}, {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1}, {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2}, @@ -369,59 +297,31 @@ static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = { {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7}, }; -#define IPV4_L3FWD_NUM_ROUTES \ - (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0])) -#define IPV6_L3FWD_NUM_ROUTES \ - (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0])) - -#define IPV4_L3FWD_LPM_MAX_RULES 1024 -#define IPV6_L3FWD_LPM_MAX_RULES 1024 -#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16) - -typedef struct rte_lpm lookup_struct_t; -typedef struct rte_lpm6 lookup6_struct_t; -static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS]; -static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS]; -#endif +#define LPM_MAX_RULES 1024 +#define LPM6_MAX_RULES 1024 +#define LPM6_NUMBER_TBL8S (1 << 16) -struct tx_lcore_stat { - uint64_t call; - uint64_t drop; - uint64_t queue; - uint64_t send; +struct rte_lpm6_config lpm6_config = { + .max_rules = LPM6_MAX_RULES, + .number_tbl8s = LPM6_NUMBER_TBL8S, + .flags = 0 }; -#ifdef IPV4_FRAG_TBL_STAT -#define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v)) -#else -#define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0) -#endif /* IPV4_FRAG_TBL_STAT */ +static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES]; +static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; -struct lcore_conf { - uint16_t n_rx_queue; - struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; - uint16_t tx_queue_id[MAX_PORTS]; - lookup_struct_t * ipv4_lookup_struct; -#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) - lookup6_struct_t * ipv6_lookup_struct; +#ifdef IPV6_FRAG_TBL_STAT +#define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v)) #else - lookup_struct_t * ipv6_lookup_struct; -#endif - struct rte_ip_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE]; - struct rte_mempool *pool[MAX_RX_QUEUE_PER_LCORE]; - struct rte_ip_frag_death_row death_row; - struct mbuf_table *tx_mbufs[MAX_PORTS]; - struct tx_lcore_stat tx_stat; -} __rte_cache_aligned; - -static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; +#define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* IPV6_FRAG_TBL_STAT */ /* * If number of queued packets reached given threahold, then * send burst of packets on an output interface. */ static inline uint32_t -send_burst(struct lcore_conf *qconf, uint32_t thresh, uint8_t port) +send_burst(struct lcore_queue_conf *qconf, uint32_t thresh, uint8_t port) { uint32_t fill, len, k, n; struct mbuf_table *txmb; @@ -434,7 +334,7 @@ send_burst(struct lcore_conf *qconf, uint32_t thresh, uint8_t port) if (fill >= thresh) { n = RTE_MIN(len - txmb->tail, fill); - + k = rte_eth_tx_burst(port, qconf->tx_queue_id[port], txmb->m_table + txmb->tail, (uint16_t)n); @@ -454,11 +354,11 @@ static inline int send_single_packet(struct rte_mbuf *m, uint8_t port) { uint32_t fill, lcore_id, len; - struct lcore_conf *qconf; + struct lcore_queue_conf *qconf; struct mbuf_table *txmb; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = &lcore_queue_conf[lcore_id]; txmb = qconf->tx_mbufs[port]; len = txmb->len; @@ -471,7 +371,7 @@ send_single_packet(struct rte_mbuf *m, uint8_t port) if (++txmb->tail == len) txmb->tail = 0; } - + TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1); txmb->m_table[txmb->head] = m; if(++txmb->head == len) @@ -480,207 +380,43 @@ send_single_packet(struct rte_mbuf *m, uint8_t port) return (0); } -#ifdef DO_RFC_1812_CHECKS -static inline int -is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len) -{ - /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */ - /* - * 1. The packet length reported by the Link Layer must be large - * enough to hold the minimum length legal IP datagram (20 bytes). - */ - if (link_len < sizeof(struct ipv4_hdr)) - return -1; - - /* 2. The IP checksum must be correct. */ - /* this is checked in H/W */ - - /* - * 3. The IP version number must be 4. If the version number is not 4 - * then the packet may be another version of IP, such as IPng or - * ST-II. - */ - if (((pkt->version_ihl) >> 4) != 4) - return -3; - /* - * 4. The IP header length field must be large enough to hold the - * minimum length legal IP datagram (20 bytes = 5 words). - */ - if ((pkt->version_ihl & 0xf) < 5) - return -4; - - /* - * 5. The IP total length field must be large enough to hold the IP - * datagram header, whose length is specified in the IP header length - * field. - */ - if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr)) - return -5; - - return 0; -} -#endif - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -static void -print_ipv4_key(struct ipv4_5tuple key) -{ - printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n", - (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto); -} -static void -print_ipv6_key(struct ipv6_5tuple key) -{ - printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", " - "port dst = %d, port src = %d, proto = %d\n", - IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src), - key.port_dst, key.port_src, key.proto); -} - -static inline uint8_t -get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct) -{ - struct ipv4_5tuple key; - struct tcp_hdr *tcp; - struct udp_hdr *udp; - int ret = 0; - - key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr); - key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr); - key.proto = ipv4_hdr->next_proto_id; - - switch (ipv4_hdr->next_proto_id) { - case IPPROTO_TCP: - tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr + - sizeof(struct ipv4_hdr)); - key.port_dst = rte_be_to_cpu_16(tcp->dst_port); - key.port_src = rte_be_to_cpu_16(tcp->src_port); - break; - - case IPPROTO_UDP: - udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr + - sizeof(struct ipv4_hdr)); - key.port_dst = rte_be_to_cpu_16(udp->dst_port); - key.port_src = rte_be_to_cpu_16(udp->src_port); - break; - - default: - key.port_dst = 0; - key.port_src = 0; - break; - } - - /* Find destination port */ - ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); - return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]); -} - -static inline uint8_t -get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, lookup_struct_t * ipv6_l3fwd_lookup_struct) -{ - struct ipv6_5tuple key; - struct tcp_hdr *tcp; - struct udp_hdr *udp; - int ret = 0; - - memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN); - memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN); - - key.proto = ipv6_hdr->proto; - - switch (ipv6_hdr->proto) { - case IPPROTO_TCP: - tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr + - sizeof(struct ipv6_hdr)); - key.port_dst = rte_be_to_cpu_16(tcp->dst_port); - key.port_src = rte_be_to_cpu_16(tcp->src_port); - break; - - case IPPROTO_UDP: - udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr + - sizeof(struct ipv6_hdr)); - key.port_dst = rte_be_to_cpu_16(udp->dst_port); - key.port_src = rte_be_to_cpu_16(udp->src_port); - break; - - default: - key.port_dst = 0; - key.port_src = 0; - break; - } - - /* Find destination port */ - ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); - return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]); -} -#endif - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) -static inline uint8_t -get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct) -{ - uint8_t next_hop; - - return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct, - rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)? - next_hop : portid); -} - -static inline uint8_t -get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct) -{ - uint8_t next_hop; - - return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct, - ipv6_hdr->dst_addr, &next_hop) == 0)? - next_hop : portid); -} -#endif - static inline void -l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, - struct lcore_conf *qconf, uint64_t tms) +reassemble(struct rte_mbuf *m, uint8_t portid, uint32_t queue, + struct lcore_queue_conf *qconf, uint64_t tms) { struct ether_hdr *eth_hdr; - struct ipv4_hdr *ipv4_hdr; + struct rte_ip_frag_tbl *tbl; + struct rte_ip_frag_death_row *dr; + struct rx_queue * rxq; void *d_addr_bytes; - uint8_t dst_port; + uint8_t next_hop, dst_port; + + rxq = &qconf->rx_queue_list[queue]; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - if (m->ol_flags & PKT_RX_IPV4_HDR) { - /* Handle IPv4 headers.*/ - ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + dst_port = portid; -#ifdef DO_RFC_1812_CHECKS - /* Check to make sure the packet is valid (RFC1812) */ - if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt.pkt_len) < 0) { - rte_pktmbuf_free(m); - return; - } + /* if packet is IPv4 */ + if (m->ol_flags & (PKT_RX_IPV4_HDR)) { + struct ipv4_hdr *ip_hdr; + uint32_t ip_dst; - /* Update time to live and header checksum */ - --(ipv4_hdr->time_to_live); - ++(ipv4_hdr->hdr_checksum); -#endif + ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); /* if it is a fragmented packet, then try to reassemble. */ - if (rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)) { - + if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) { struct rte_mbuf *mo; - struct rte_ip_frag_tbl *tbl; - struct rte_ip_frag_death_row *dr; - tbl = qconf->frag_tbl[queue]; + tbl = rxq->frag_tbl; dr = &qconf->death_row; /* prepare mbuf: setup l2_len/l3_len. */ m->pkt.vlan_macip.f.l2_len = sizeof(*eth_hdr); - m->pkt.vlan_macip.f.l3_len = sizeof(*ipv4_hdr); + m->pkt.vlan_macip.f.l3_len = sizeof(*ip_hdr); /* process this fragment. */ - if ((mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, - ipv4_hdr)) == NULL) + if ((mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr)) == NULL) /* no packet to send out. */ return; @@ -689,47 +425,67 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, m = mo; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); } } + ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr); - dst_port = get_ipv4_dst_port(ipv4_hdr, portid, - qconf->ipv4_lookup_struct); - if (dst_port >= MAX_PORTS || - (enabled_port_mask & 1 << dst_port) == 0) - dst_port = portid; + /* Find destination port */ + if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 && + (enabled_port_mask & 1 << next_hop) != 0) { + dst_port = next_hop; + } - /* 02:00:00:00:00:xx */ - d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4); + } + /* if packet is IPv6 */ + else if (m->ol_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) { + struct ipv6_extension_fragment *frag_hdr; + struct ipv6_hdr *ip_hdr; - /* src addr */ - ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1); - send_single_packet(m, dst_port); - } - else { - /* Handle IPv6 headers.*/ - struct ipv6_hdr *ipv6_hdr; + frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr); - ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) + - sizeof(struct ether_hdr)); + if(frag_hdr != NULL) { + struct rte_mbuf *mo; - dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct); + tbl = rxq->frag_tbl; + dr = &qconf->death_row; - if (dst_port >= MAX_PORTS || (enabled_port_mask & 1 << dst_port) == 0) - dst_port = portid; + /* prepare mbuf: setup l2_len/l3_len. */ + m->pkt.vlan_macip.f.l2_len = sizeof(*eth_hdr); + m->pkt.vlan_macip.f.l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr); - /* 02:00:00:00:00:xx */ - d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); + if((mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, + frag_hdr)) == NULL) + return; - /* src addr */ - ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + if(mo != m) { + m = mo; + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1); + } + } - send_single_packet(m, dst_port); + /* Find destination port */ + if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, &next_hop) == 0 && + (enabled_port_mask & 1 << next_hop) != 0) { + dst_port = next_hop; + } + + eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6); } + /* if packet wasn't IPv4 or IPv6, it's forwarded to the port it came from */ + + /* 02:00:00:00:00:xx */ + d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; + *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40); + /* src addr */ + ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); + + send_single_packet(m, dst_port); } /* main processing loop */ @@ -740,28 +496,27 @@ main_loop(__attribute__((unused)) void *dummy) unsigned lcore_id; uint64_t diff_tsc, cur_tsc, prev_tsc; int i, j, nb_rx; - uint8_t portid, queueid; - struct lcore_conf *qconf; + uint8_t portid; + struct lcore_queue_conf *qconf; const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; prev_tsc = 0; lcore_id = rte_lcore_id(); - qconf = &lcore_conf[lcore_id]; + qconf = &lcore_queue_conf[lcore_id]; if (qconf->n_rx_queue == 0) { - RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id); + RTE_LOG(INFO, IP_RSMBL, "lcore %u has nothing to do\n", lcore_id); return 0; } - RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id); + RTE_LOG(INFO, IP_RSMBL, "entering main loop on lcore %u\n", lcore_id); for (i = 0; i < qconf->n_rx_queue; i++) { - portid = qconf->rx_queue_list[i].port_id; - queueid = qconf->rx_queue_list[i].queue_id; - RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id, - portid, queueid); + portid = qconf->rx_queue_list[i].portid; + RTE_LOG(INFO, IP_RSMBL, " -- lcoreid=%u portid=%hhu\n", lcore_id, + portid); } while (1) { @@ -778,7 +533,7 @@ main_loop(__attribute__((unused)) void *dummy) * This could be optimized (use queueid instead of * portid), but it is not called so often */ - for (portid = 0; portid < MAX_PORTS; portid++) { + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { if ((enabled_port_mask & (1 << portid)) != 0) send_burst(qconf, 1, portid); } @@ -791,10 +546,9 @@ main_loop(__attribute__((unused)) void *dummy) */ for (i = 0; i < qconf->n_rx_queue; ++i) { - portid = qconf->rx_queue_list[i].port_id; - queueid = qconf->rx_queue_list[i].queue_id; + portid = qconf->rx_queue_list[i].portid; - nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, + nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST); /* Prefetch first packets */ @@ -807,13 +561,13 @@ main_loop(__attribute__((unused)) void *dummy) for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[ j + PREFETCH_OFFSET], void *)); - l3fwd_simple_forward(pkts_burst[j], portid, + reassemble(pkts_burst[j], portid, i, qconf, cur_tsc); } /* Forward remaining prefetched packets */ for (; j < nb_rx; j++) { - l3fwd_simple_forward(pkts_burst[j], portid, + reassemble(pkts_burst[j], portid, i, qconf, cur_tsc); } @@ -823,104 +577,15 @@ main_loop(__attribute__((unused)) void *dummy) } } -static int -check_lcore_params(void) -{ - uint8_t queue, lcore; - uint16_t i; - int socketid; - - for (i = 0; i < nb_lcore_params; ++i) { - queue = lcore_params[i].queue_id; - if (queue >= MAX_RX_QUEUE_PER_PORT) { - printf("invalid queue number: %hhu\n", queue); - return -1; - } - lcore = lcore_params[i].lcore_id; - if (!rte_lcore_is_enabled(lcore)) { - printf("error: lcore %hhu is not enabled in lcore mask\n", lcore); - return -1; - } - if ((socketid = rte_lcore_to_socket_id(lcore) != 0) && - (numa_on == 0)) { - printf("warning: lcore %hhu is on socket %d with numa off \n", - lcore, socketid); - } - } - return 0; -} - -static int -check_port_config(const unsigned nb_ports) -{ - unsigned portid; - uint16_t i; - - for (i = 0; i < nb_lcore_params; ++i) { - portid = lcore_params[i].port_id; - if ((enabled_port_mask & (1 << portid)) == 0) { - printf("port %u is not enabled in port mask\n", portid); - return -1; - } - if (portid >= nb_ports) { - printf("port %u is not present on the board\n", portid); - return -1; - } - } - return 0; -} - -static uint8_t -get_port_n_rx_queues(const uint8_t port) -{ - int queue = -1; - uint16_t i; - - for (i = 0; i < nb_lcore_params; ++i) { - if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue) - queue = lcore_params[i].queue_id; - } - return (uint8_t)(++queue); -} - -static int -init_lcore_rx_queues(void) -{ - uint16_t i, nb_rx_queue; - uint8_t lcore; - - for (i = 0; i < nb_lcore_params; ++i) { - lcore = lcore_params[i].lcore_id; - nb_rx_queue = lcore_conf[lcore].n_rx_queue; - if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) { - printf("error: too many queues (%u) for lcore: %u\n", - (unsigned)nb_rx_queue + 1, (unsigned)lcore); - return -1; - } else { - lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id = - lcore_params[i].port_id; - lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id = - lcore_params[i].queue_id; - lcore_conf[lcore].n_rx_queue++; - } - } - return 0; -} - /* display usage */ static void print_usage(const char *prgname) { - printf ("%s [EAL options] -- -p PORTMASK -P" - " [--config (port,queue,lcore)[,(port,queue,lcore]]" - " [--enable-jumbo [--max-pkt-len PKTLEN]]" + printf ("%s [EAL options] -- -p PORTMASK [-q NQ]" + " [--max-pkt-len PKTLEN]" " [--maxflows=] [--flowttl=[(s|ms)]]\n" " -p PORTMASK: hexadecimal bitmask of ports to configure\n" - " -P : enable promiscuous mode\n" - " --config (port,queue,lcore): rx queues configuration\n" - " --no-numa: optional, disable numa awareness\n" - " --enable-jumbo: enable jumbo frame" - " which max packet len is PKTLEN in decimal (64-9600)\n" + " -q NQ: number of RX queues per lcore\n" " --maxflows=: optional, maximum number of flows " "supported\n" " --flowttl=[(s|ms)]: optional, maximum TTL for each " @@ -953,8 +618,8 @@ parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val) char *end; uint64_t v; - static const char frmt_sec[] = "s"; - static const char frmt_msec[] = "ms"; + static const char frmt_sec[] = "s"; + static const char frmt_msec[] = "ms"; /* parse decimal string */ errno = 0; @@ -976,23 +641,6 @@ parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val) return (0); } - -static int parse_max_pkt_len(const char *pktlen) -{ - char *end = NULL; - unsigned long len; - - /* parse decimal string */ - len = strtoul(pktlen, &end, 10); - if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0')) - return -1; - - if (len == 0) - return -1; - - return len; -} - static int parse_portmask(const char *portmask) { @@ -1011,54 +659,23 @@ parse_portmask(const char *portmask) } static int -parse_config(const char *q_arg) +parse_nqueue(const char *q_arg) { - char s[256]; - const char *p, *p0 = q_arg; - char *end; - enum fieldnames { - FLD_PORT = 0, - FLD_QUEUE, - FLD_LCORE, - _NUM_FLD - }; - unsigned long int_fld[_NUM_FLD]; - char *str_fld[_NUM_FLD]; - int i; - unsigned size; - - nb_lcore_params = 0; + char *end = NULL; + unsigned long n; - while ((p = strchr(p0,'(')) != NULL) { - ++p; - if((p0 = strchr(p,')')) == NULL) - return -1; + printf("%p\n", q_arg); - size = p0 - p; - if(size >= sizeof(s)) - return -1; + /* parse hexadecimal string */ + n = strtoul(q_arg, &end, 10); + if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + if (n == 0) + return -1; + if (n >= MAX_RX_QUEUE_PER_LCORE) + return -1; - rte_snprintf(s, sizeof(s), "%.*s", size, p); - if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) - return -1; - for (i = 0; i < _NUM_FLD; i++){ - errno = 0; - int_fld[i] = strtoul(str_fld[i], &end, 0); - if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) - return -1; - } - if (nb_lcore_params >= MAX_LCORE_PARAMS) { - printf("exceeded max number of lcore params: %hu\n", - nb_lcore_params); - return -1; - } - lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT]; - lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE]; - lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE]; - ++nb_lcore_params; - } - lcore_params = lcore_params_array; - return 0; + return n; } /* Parse the argument given in the command line of the application */ @@ -1070,9 +687,7 @@ parse_args(int argc, char **argv) int option_index; char *prgname = argv[0]; static struct option lgopts[] = { - {"config", 1, 0, 0}, - {"no-numa", 0, 0, 0}, - {"enable-jumbo", 0, 0, 0}, + {"max-pkt-len", 1, 0, 0}, {"maxflows", 1, 0, 0}, {"flowttl", 1, 0, 0}, {NULL, 0, 0, 0} @@ -1080,7 +695,7 @@ parse_args(int argc, char **argv) argvopt = argv; - while ((opt = getopt_long(argc, argvopt, "p:P", + while ((opt = getopt_long(argc, argvopt, "p:q:", lgopts, &option_index)) != EOF) { switch (opt) { @@ -1093,27 +708,19 @@ parse_args(int argc, char **argv) return -1; } break; - case 'P': - printf("Promiscuous mode selected\n"); - promiscuous_on = 1; + + /* nqueue */ + case 'q': + rx_queue_per_lcore = parse_nqueue(optarg); + if (rx_queue_per_lcore < 0) { + printf("invalid queue number\n"); + print_usage(prgname); + return -1; + } break; /* long options */ case 0: - if (!strncmp(lgopts[option_index].name, "config", 6)) { - ret = parse_config(optarg); - if (ret) { - printf("invalid config\n"); - print_usage(prgname); - return -1; - } - } - - if (!strncmp(lgopts[option_index].name, "no-numa", 7)) { - printf("numa is disabled \n"); - numa_on = 0; - } - if (!strncmp(lgopts[option_index].name, "maxflows", 8)) { if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM, @@ -1127,7 +734,7 @@ parse_args(int argc, char **argv) return (ret); } } - + if (!strncmp(lgopts[option_index].name, "flowttl", 7)) { if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL, MAX_FLOW_TTL, @@ -1141,26 +748,6 @@ parse_args(int argc, char **argv) } } - if (!strncmp(lgopts[option_index].name, "enable-jumbo", 12)) { - struct option lenopts = {"max-pkt-len", required_argument, 0, 0}; - - printf("jumbo frame is enabled \n"); - port_conf.rxmode.jumbo_frame = 1; - - /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ - if (0 == getopt_long(argc, argvopt, "", &lenopts, &option_index)) { - ret = parse_max_pkt_len(optarg); - if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)){ - printf("invalid packet length\n"); - print_usage(prgname); - return -1; - } - port_conf.rxmode.max_rx_pkt_len = ret; - } - printf("set jumbo frame max packet length to %u\n", - (unsigned int)port_conf.rxmode.max_rx_pkt_len); - } - break; default: @@ -1189,182 +776,6 @@ print_ethaddr(const char *name, const struct ether_addr *eth_addr) eth_addr->addr_bytes[5]); } -#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -static void -setup_hash(int socketid) -{ - struct rte_hash_parameters ipv4_l3fwd_hash_params = { - .name = NULL, - .entries = L3FWD_HASH_ENTRIES, - .bucket_entries = 4, - .key_len = sizeof(struct ipv4_5tuple), - .hash_func = DEFAULT_HASH_FUNC, - .hash_func_init_val = 0, - }; - - struct rte_hash_parameters ipv6_l3fwd_hash_params = { - .name = NULL, - .entries = L3FWD_HASH_ENTRIES, - .bucket_entries = 4, - .key_len = sizeof(struct ipv6_5tuple), - .hash_func = DEFAULT_HASH_FUNC, - .hash_func_init_val = 0, - }; - - unsigned i; - int ret; - char s[64]; - - /* create ipv4 hash */ - rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); - ipv4_l3fwd_hash_params.name = s; - ipv4_l3fwd_hash_params.socket_id = socketid; - ipv4_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv4_l3fwd_hash_params); - if (ipv4_l3fwd_lookup_struct[socketid] == NULL) - rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " - "socket %d\n", socketid); - - /* create ipv6 hash */ - rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); - ipv6_l3fwd_hash_params.name = s; - ipv6_l3fwd_hash_params.socket_id = socketid; - ipv6_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv6_l3fwd_hash_params); - if (ipv6_l3fwd_lookup_struct[socketid] == NULL) - rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on " - "socket %d\n", socketid); - - - /* populate the ipv4 hash */ - for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { - ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid], - (void *) &ipv4_l3fwd_route_array[i].key); - if (ret < 0) { - rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" - "l3fwd hash on socket %d\n", i, socketid); - } - ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out; - printf("Hash: Adding key\n"); - print_ipv4_key(ipv4_l3fwd_route_array[i].key); - } - - /* populate the ipv6 hash */ - for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { - ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid], - (void *) &ipv6_l3fwd_route_array[i].key); - if (ret < 0) { - rte_exit(EXIT_FAILURE, "Unable to add entry %u to the" - "l3fwd hash on socket %d\n", i, socketid); - } - ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out; - printf("Hash: Adding key\n"); - print_ipv6_key(ipv6_l3fwd_route_array[i].key); - } -} -#endif - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) -static void -setup_lpm(int socketid) -{ - struct rte_lpm6_config config; - unsigned i; - int ret; - char s[64]; - - /* create the LPM table */ - rte_snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); - ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid, - IPV4_L3FWD_LPM_MAX_RULES, 0); - if (ipv4_l3fwd_lookup_struct[socketid] == NULL) - rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" - " on socket %d\n", socketid); - - /* populate the LPM table */ - for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) { - ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid], - ipv4_l3fwd_route_array[i].ip, - ipv4_l3fwd_route_array[i].depth, - ipv4_l3fwd_route_array[i].if_out); - - if (ret < 0) { - rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " - "l3fwd LPM table on socket %d\n", - i, socketid); - } - - printf("LPM: Adding route 0x%08x / %d (%d)\n", - (unsigned)ipv4_l3fwd_route_array[i].ip, - ipv4_l3fwd_route_array[i].depth, - ipv4_l3fwd_route_array[i].if_out); - } - - /* create the LPM6 table */ - rte_snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); - - config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; - config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; - config.flags = 0; - ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid, - &config); - if (ipv6_l3fwd_lookup_struct[socketid] == NULL) - rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table" - " on socket %d\n", socketid); - - /* populate the LPM table */ - for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) { - ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid], - ipv6_l3fwd_route_array[i].ip, - ipv6_l3fwd_route_array[i].depth, - ipv6_l3fwd_route_array[i].if_out); - - if (ret < 0) { - rte_exit(EXIT_FAILURE, "Unable to add entry %u to the " - "l3fwd LPM table on socket %d\n", - i, socketid); - } - - printf("LPM: Adding route %s / %d (%d)\n", - "IPV6", - ipv6_l3fwd_route_array[i].depth, - ipv6_l3fwd_route_array[i].if_out); - } -} -#endif - -static int -init_mem(void) -{ - struct lcore_conf *qconf; - int socketid; - unsigned lcore_id; - - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { - if (rte_lcore_is_enabled(lcore_id) == 0) - continue; - - if (numa_on) - socketid = rte_lcore_to_socket_id(lcore_id); - else - socketid = 0; - - if (socketid >= NB_SOCKETS) { - rte_exit(EXIT_FAILURE, - "Socket %d of lcore %u is out of range %d\n", - socketid, lcore_id, NB_SOCKETS); - } - -#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) - setup_lpm(socketid); -#else - setup_hash(socketid); -#endif - qconf = &lcore_conf[lcore_id]; - qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid]; - qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid]; - } - return 0; -} - /* Check the link status of all ports in up to 9s, and print them finally */ static void check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) @@ -1415,12 +826,73 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) /* set the print_flag if all ports up or timeout */ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { print_flag = 1; - printf("done\n"); + printf("\ndone\n"); } } } -static void -setup_port_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, + +static int +init_routing_table(void) +{ + struct rte_lpm * lpm; + struct rte_lpm6 * lpm6; + int socket, ret; + unsigned i; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + if (socket_lpm[socket]) { + lpm = socket_lpm[socket]; + /* populate the LPM table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) { + ret = rte_lpm_add(lpm, + l3fwd_ipv4_route_array[i].ip, + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " + "LPM table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv4_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv4_BYTES(l3fwd_ipv4_route_array[i].ip), + l3fwd_ipv4_route_array[i].depth, + l3fwd_ipv4_route_array[i].if_out); + } + } + + if (socket_lpm6[socket]) { + lpm6 = socket_lpm6[socket]; + /* populate the LPM6 table */ + for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) { + ret = rte_lpm6_add(lpm6, + l3fwd_ipv6_route_array[i].ip, + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + + if (ret < 0) { + RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd " + "LPM6 table\n", i); + return -1; + } + + RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv6_BYTES_FMT + "/%d (port %d)\n", + socket, + IPv6_BYTES(l3fwd_ipv6_route_array[i].ip), + l3fwd_ipv6_route_array[i].depth, + l3fwd_ipv6_route_array[i].if_out); + } + } + } + return 0; +} + +static int +setup_port_tbl(struct lcore_queue_conf *qconf, uint32_t lcore, int socket, uint32_t port) { struct mbuf_table *mtb; @@ -1431,73 +903,136 @@ setup_port_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) * n; if ((mtb = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, - socket)) == NULL) - rte_exit(EXIT_FAILURE, "%s() for lcore: %u, port: %u " + socket)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "%s() for lcore: %u, port: %u " "failed to allocate %zu bytes\n", __func__, lcore, port, sz); + return -1; + } mtb->len = n; qconf->tx_mbufs[port] = mtb; + + return 0; } -static void -setup_queue_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, - uint32_t queue) +static int +setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue) { + int socket; uint32_t nb_mbuf; uint64_t frag_cycles; char buf[RTE_MEMPOOL_NAMESIZE]; + socket = rte_lcore_to_socket_id(lcore); + if (socket == SOCKET_ID_ANY) + socket = 0; + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * max_flow_ttl; - if ((qconf->frag_tbl[queue] = rte_ip_frag_table_create(max_flow_num, - IPV4_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles, - socket)) == NULL) - rte_exit(EXIT_FAILURE, "ipv4_frag_tbl_create(%u) on " + if ((rxq->frag_tbl = rte_ip_frag_table_create(max_flow_num, + IP_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles, + socket)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "ip_frag_tbl_create(%u) on " "lcore: %u for queue: %u failed\n", max_flow_num, lcore, queue); + return -1; + } /* * At any given moment up to * mbufs could be stored int the fragment table. * Plus, each TX queue can hold up to packets. - */ + */ - nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * - RTE_LIBRTE_IP_FRAG_MAX_FRAG; + nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT; + nb_mbuf *= 2; /* ipv4 and ipv6 */ + + nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF); - nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)DEF_MBUF_NUM); - rte_snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue); - if ((qconf->pool[queue] = rte_mempool_create(buf, nb_mbuf, MBUF_SIZE, 0, + if ((rxq->pool = rte_mempool_create(buf, nb_mbuf, MBUF_SIZE, 0, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, - socket, MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) == NULL) - rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", buf); + socket, MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) == NULL) { + RTE_LOG(ERR, IP_RSMBL, "mempool_create(%s) failed", buf); + return -1; + } + + return 0; +} + +static int +init_mem(void) +{ + char buf[PATH_MAX]; + struct rte_lpm * lpm; + struct rte_lpm6 * lpm6; + int socket; + unsigned lcore_id; + + /* traverse through lcores and initialize structures on each socket */ + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + + socket = rte_lcore_to_socket_id(lcore_id); + + if (socket == SOCKET_ID_ANY) + socket = 0; + + if (socket_lpm[socket] == NULL) { + RTE_LOG(INFO, IP_RSMBL, "Creating LPM table on socket %i\n", socket); + rte_snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); + + lpm = rte_lpm_create(buf, socket, LPM_MAX_RULES, 0); + if (lpm == NULL) { + RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); + return -1; + } + socket_lpm[socket] = lpm; + } + + if (socket_lpm6[socket] == NULL) { + RTE_LOG(INFO, IP_RSMBL, "Creating LPM6 table on socket %i\n", socket); + rte_snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket); + + lpm6 = rte_lpm6_create("IP_RSMBL_LPM6", socket, &lpm6_config); + if (lpm6 == NULL) { + RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n"); + return -1; + } + socket_lpm6[socket] = lpm6; + } + } + + return 0; } static void queue_dump_stat(void) { uint32_t i, lcore; - const struct lcore_conf *qconf; + const struct lcore_queue_conf *qconf; for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { if (rte_lcore_is_enabled(lcore) == 0) continue; - qconf = lcore_conf + lcore; + qconf = &lcore_queue_conf[lcore]; for (i = 0; i < qconf->n_rx_queue; i++) { fprintf(stdout, " -- lcoreid=%u portid=%hhu " - "rxqueueid=%hhu frag tbl stat:\n", - lcore, qconf->rx_queue_list[i].port_id, - qconf->rx_queue_list[i].queue_id); - rte_ip_frag_table_statistics_dump(stdout, qconf->frag_tbl[i]); + "frag tbl stat:\n", + lcore, qconf->rx_queue_list[i].portid); + rte_ip_frag_table_statistics_dump(stdout, + qconf->rx_queue_list[i].frag_tbl); fprintf(stdout, "TX bursts:\t%" PRIu64 "\n" "TX packets _queued:\t%" PRIu64 "\n" "TX packets dropped:\t%" PRIu64 "\n" @@ -1521,13 +1056,14 @@ signal_handler(int signum) int MAIN(int argc, char **argv) { - struct lcore_conf *qconf; - int ret; + struct lcore_queue_conf *qconf; + struct rx_queue * rxq; + int ret, socket; unsigned nb_ports; uint16_t queueid; - unsigned lcore_id; + unsigned lcore_id = 0, rx_lcore_id = 0; uint32_t n_tx_queue, nb_lcores; - uint8_t portid, nb_rx_queue, queue, socketid; + uint8_t portid; /* init EAL */ ret = rte_eal_init(argc, argv); @@ -1539,28 +1075,23 @@ MAIN(int argc, char **argv) /* parse application arguments (after the EAL ones) */ ret = parse_args(argc, argv); if (ret < 0) - rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); - - if (check_lcore_params() < 0) - rte_exit(EXIT_FAILURE, "check_lcore_params failed\n"); - - ret = init_lcore_rx_queues(); - if (ret < 0) - rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); - + rte_exit(EXIT_FAILURE, "Invalid IP reassembly parameters\n"); if (rte_eal_pci_probe() < 0) rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); nb_ports = rte_eth_dev_count(); - if (nb_ports > MAX_PORTS) - nb_ports = MAX_PORTS; - - if (check_port_config(nb_ports) < 0) - rte_exit(EXIT_FAILURE, "check_port_config failed\n"); + if (nb_ports > RTE_MAX_ETHPORTS) + nb_ports = RTE_MAX_ETHPORTS; + else if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No ports found!\n"); nb_lcores = rte_lcore_count(); + /* initialize structures (mempools, lpm etc.) */ + if (init_mem() < 0) + rte_panic("Cannot initialize memory structures!\n"); + /* initialize all ports */ for (portid = 0; portid < nb_ports; portid++) { /* skip ports that are not enabled */ @@ -1569,30 +1100,62 @@ MAIN(int argc, char **argv) continue; } + qconf = &lcore_queue_conf[rx_lcore_id]; + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || + qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) { + + rx_lcore_id ++; + if (rx_lcore_id >= RTE_MAX_LCORE) + rte_exit(EXIT_FAILURE, "Not enough cores\n"); + + qconf = &lcore_queue_conf[rx_lcore_id]; + } + + socket = rte_eth_dev_socket_id(portid); + if (socket == SOCKET_ID_ANY) + socket = 0; + + queueid = qconf->n_rx_queue; + rxq = &qconf->rx_queue_list[queueid]; + rxq->portid = portid; + rxq->lpm = socket_lpm[socket]; + rxq->lpm6 = socket_lpm6[socket]; + if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0) + rte_exit(EXIT_FAILURE, "Failed to set up queue table\n"); + qconf->n_rx_queue++; + /* init port */ printf("Initializing port %d ... ", portid ); fflush(stdout); - nb_rx_queue = get_port_n_rx_queues(portid); n_tx_queue = nb_lcores; if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) n_tx_queue = MAX_TX_QUEUE_PER_PORT; - printf("Creating queues: nb_rxq=%d nb_txq=%u... ", - nb_rx_queue, (unsigned)n_tx_queue ); - ret = rte_eth_dev_configure(portid, nb_rx_queue, - (uint16_t)n_tx_queue, &port_conf); - if (ret < 0) - rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", + ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue, + &port_conf); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "Cannot configure device: " + "err=%d, port=%d\n", ret, portid); + } + + /* init one RX queue */ + ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, + socket, &rx_conf, + rxq->pool); + if (ret < 0) { + printf("\n"); + rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: " + "err=%d, port=%d\n", + ret, portid); + } rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); - printf(", "); - - /* init memory */ - ret = init_mem(); - if (ret < 0) - rte_exit(EXIT_FAILURE, "init_mem failed\n"); + printf("\n"); /* init one TX queue per couple (lcore,port) */ queueid = 0; @@ -1600,57 +1163,24 @@ MAIN(int argc, char **argv) if (rte_lcore_is_enabled(lcore_id) == 0) continue; - if (numa_on) - socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); - else - socketid = 0; + socket = (int) rte_lcore_to_socket_id(lcore_id); - printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); + printf("txq=%u,%d,%d ", lcore_id, queueid, socket); fflush(stdout); ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, - socketid, &tx_conf); + socket, &tx_conf); if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " "port=%d\n", ret, portid); - qconf = &lcore_conf[lcore_id]; + qconf = &lcore_queue_conf[lcore_id]; qconf->tx_queue_id[portid] = queueid; - setup_port_tbl(qconf, lcore_id, socketid, portid); + setup_port_tbl(qconf, lcore_id, socket, portid); queueid++; } printf("\n"); } - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { - if (rte_lcore_is_enabled(lcore_id) == 0) - continue; - qconf = &lcore_conf[lcore_id]; - printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); - fflush(stdout); - /* init RX queues */ - for(queue = 0; queue < qconf->n_rx_queue; ++queue) { - portid = qconf->rx_queue_list[queue].port_id; - queueid = qconf->rx_queue_list[queue].queue_id; - - if (numa_on) - socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id); - else - socketid = 0; - - printf("rxq=%d,%d,%d ", portid, queueid, socketid); - fflush(stdout); - - setup_queue_tbl(qconf, lcore_id, socketid, queue); - - ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, - socketid, &rx_conf, qconf->pool[queue]); - if (ret < 0) - rte_exit(EXIT_FAILURE, - "rte_eth_rx_queue_setup: err=%d," - "port=%d\n", ret, portid); - } - } - printf("\n"); /* start ports */ @@ -1664,16 +1194,12 @@ MAIN(int argc, char **argv) rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n", ret, portid); - /* - * If enabled, put device in promiscuous mode. - * This allows IO forwarding mode to forward packets - * to itself through 2 cross-connected ports of the - * target machine. - */ - if (promiscuous_on) - rte_eth_promiscuous_enable(portid); + rte_eth_promiscuous_enable(portid); } + if (init_routing_table() < 0) + rte_exit(EXIT_FAILURE, "Cannot init routing table\n"); + check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); signal(SIGUSR1, signal_handler); -- 1.8.1.4