From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id BBD54B0C7 for ; Wed, 28 May 2014 19:32:42 +0200 (CEST) Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP; 28 May 2014 10:27:43 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.98,929,1392192000"; d="scan'208";a="519165911" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga001.jf.intel.com with ESMTP; 28 May 2014 10:32:50 -0700 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id s4SHWnxx004044; Wed, 28 May 2014 18:32:49 +0100 Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id s4SHWnKQ031704; Wed, 28 May 2014 18:32:49 +0100 Received: (from aburakov@localhost) by sivswdev02.ir.intel.com with id s4SHWnQA031700; Wed, 28 May 2014 18:32:49 +0100 From: Anatoly Burakov To: dev@dpdk.org Date: Wed, 28 May 2014 18:32:41 +0100 Message-Id: <1625c043bb49508dd2aab9877f86183ab7e403d7.1401298292.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 07/13] ip_frag: refactored reassembly code and made it a proper library X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 May 2014 17:32:45 -0000 Signed-off-by: Anatoly Burakov --- config/common_bsdapp | 2 + config/common_linuxapp | 2 + examples/ip_reassembly/main.c | 24 +- lib/librte_ip_frag/Makefile | 6 +- lib/librte_ip_frag/ip_frag_common.h | 134 +++++++++- lib/librte_ip_frag/ip_frag_internal.c | 337 ++++++++++++++++++++++++ lib/librte_ip_frag/ipv4_frag_tbl.h | 400 ----------------------------- lib/librte_ip_frag/rte_ip_frag.h | 223 +++++++++++++++- lib/librte_ip_frag/rte_ip_frag_common.c | 142 ++++++++++ lib/librte_ip_frag/rte_ipv4_reassembly.c | 189 ++++++++++++++ lib/librte_ip_frag/rte_ipv4_rsmbl.h | 427 ------------------------------- 11 files changed, 1023 insertions(+), 863 deletions(-) create mode 100644 lib/librte_ip_frag/ip_frag_internal.c delete mode 100644 lib/librte_ip_frag/ipv4_frag_tbl.h create mode 100644 lib/librte_ip_frag/rte_ip_frag_common.c create mode 100644 lib/librte_ip_frag/rte_ipv4_reassembly.c delete mode 100644 lib/librte_ip_frag/rte_ipv4_rsmbl.h diff --git a/config/common_bsdapp b/config/common_bsdapp index d30802e..be56ca7 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -261,6 +261,8 @@ CONFIG_RTE_LIBRTE_NET=y # Compile librte_net # CONFIG_RTE_LIBRTE_IP_FRAG=y +CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n +CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4 # # Compile librte_meter diff --git a/config/common_linuxapp b/config/common_linuxapp index 074d961..4d58496 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -288,6 +288,8 @@ CONFIG_RTE_LIBRTE_NET=y # Compile librte_net # CONFIG_RTE_LIBRTE_IP_FRAG=y +CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n +CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4 # # Compile librte_meter diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c index 23ec4be..6c40d76 100644 --- a/examples/ip_reassembly/main.c +++ b/examples/ip_reassembly/main.c @@ -94,7 +94,7 @@ #define MAX_PKT_BURST 32 -#include "rte_ipv4_rsmbl.h" +#include "rte_ip_frag.h" #ifndef IPv6_BYTES #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\ @@ -407,9 +407,9 @@ struct lcore_conf { #else lookup_struct_t * ipv6_lookup_struct; #endif - struct ip_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE]; + struct rte_ip_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE]; struct rte_mempool *pool[MAX_RX_QUEUE_PER_LCORE]; - struct ip_frag_death_row death_row; + struct rte_ip_frag_death_row death_row; struct mbuf_table *tx_mbufs[MAX_PORTS]; struct tx_lcore_stat tx_stat; } __rte_cache_aligned; @@ -645,7 +645,6 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, struct ipv4_hdr *ipv4_hdr; void *d_addr_bytes; uint8_t dst_port; - uint16_t flag_offset, ip_flag, ip_ofs; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); @@ -665,16 +664,12 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, ++(ipv4_hdr->hdr_checksum); #endif - flag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); - ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); - ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); - /* if it is a fragmented packet, then try to reassemble. */ - if (ip_flag != 0 || ip_ofs != 0) { + if (rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)) { struct rte_mbuf *mo; - struct ip_frag_tbl *tbl; - struct ip_frag_death_row *dr; + struct rte_ip_frag_tbl *tbl; + struct rte_ip_frag_death_row *dr; tbl = qconf->frag_tbl[queue]; dr = &qconf->death_row; @@ -684,8 +679,8 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue, m->pkt.vlan_macip.f.l3_len = sizeof(*ipv4_hdr); /* process this fragment. */ - if ((mo = rte_ipv4_reassemble_packet(tbl, dr, m, tms, ipv4_hdr, - ip_ofs, ip_flag)) == NULL) + if ((mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, + ipv4_hdr)) == NULL) /* no packet to send out. */ return; @@ -1469,7 +1464,8 @@ setup_queue_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket, * Plus, each TX queue can hold up to packets. */ - nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM; + nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * + RTE_LIBRTE_IP_FRAG_MAX_FRAG; nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE; nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT; diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile index 13a83b1..022092d 100644 --- a/lib/librte_ip_frag/Makefile +++ b/lib/librte_ip_frag/Makefile @@ -39,11 +39,13 @@ CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) #source files SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h -SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += ipv4_frag_tbl.h -SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ipv4_rsmbl.h + # this library depends on rte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h index 6d4706a..3e588a0 100644 --- a/lib/librte_ip_frag/ip_frag_common.h +++ b/lib/librte_ip_frag/ip_frag_common.h @@ -36,19 +36,141 @@ #include "rte_ip_frag.h" -/* Debug on/off */ -#ifdef RTE_IP_FRAG_DEBUG +/* logging macros. */ +#ifdef RTE_LIBRTE_IP_FRAG_DEBUG + +#define IP_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) #define RTE_IP_FRAG_ASSERT(exp) \ if (!(exp)) { \ rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ __func__, __LINE__); \ } +#else +#define IP_FRAG_LOG(lvl, fmt, args...) do {} while(0) +#define RTE_IP_FRAG_ASSERT(exp) do { } while(0) +#endif /* IP_FRAG_DEBUG */ + +/* helper macros */ +#define IP_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) + +/* internal functions declarations */ +struct rte_mbuf * ip_frag_process(struct rte_ip_frag_pkt *fp, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, + uint16_t ofs, uint16_t len, uint16_t more_frags); + +struct rte_ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms); + +struct rte_ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct rte_ip_frag_pkt **free, struct rte_ip_frag_pkt **stale); + +/* these functions need to be declared here as ip_frag_process relies on them */ +struct rte_mbuf * ipv4_frag_reassemble(const struct rte_ip_frag_pkt *fp); + + + +/* + * misc frag key functions + */ + +/* check if key is empty */ +static inline int +ip_frag_key_is_empty(const struct ip_frag_key * key) +{ + if (key->src_dst != 0) + return 0; + return 1; +} -#else /*RTE_IP_FRAG_DEBUG*/ +/* empty the key */ +static inline void +ip_frag_key_invalidate(struct ip_frag_key * key) +{ + key->src_dst = 0; +} + +/* compare two keys */ +static inline int +ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2) +{ + return k1->src_dst ^ k2->src_dst; +} -#define RTE_IP_FRAG_ASSERT(exp) do { } while (0) +/* + * misc fragment functions + */ + +/* put fragment on death row */ +static inline void +ip_frag_free(struct rte_ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr) +{ + uint32_t i, k; + + k = dr->cnt; + for (i = 0; i != fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + dr->row[k++] = fp->frags[i].mb; + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; + dr->cnt = k; +} + +/* if key is empty, mark key as in use */ +static inline void +ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct rte_ip_frag_pkt *fp) +{ + if (ip_frag_key_is_empty(&fp->key)) { + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + } +} + +/* reset the fragment */ +static inline void +ip_frag_reset(struct rte_ip_frag_pkt *fp, uint64_t tms) +{ + static const struct ip_frag zero_frag = { + .ofs = 0, + .len = 0, + .mb = NULL, + }; + + fp->start = tms; + fp->total_size = UINT32_MAX; + fp->frag_size = 0; + fp->last_idx = IP_MIN_FRAG_NUM; + fp->frags[IP_LAST_FRAG_IDX] = zero_frag; + fp->frags[IP_FIRST_FRAG_IDX] = zero_frag; +} + +/* chain two mbufs */ +static inline void +ip_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp) +{ + struct rte_mbuf *ms; + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len + + mp->pkt.vlan_macip.f.l3_len)); + + /* chain two fragments. */ + ms = rte_pktmbuf_lastseg(mn); + ms->pkt.next = mp; + + /* accumulate number of segments and total length. */ + mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs); + mn->pkt.pkt_len += mp->pkt.pkt_len; + + /* reset pkt_len and nb_segs for chained fragment. */ + mp->pkt.pkt_len = mp->pkt.data_len; + mp->pkt.nb_segs = 1; +} -#endif /*RTE_IP_FRAG_DEBUG*/ -#endif +#endif /* _IP_FRAG_COMMON_H_ */ diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c new file mode 100644 index 0000000..2f5a4b8 --- /dev/null +++ b/lib/librte_ip_frag/ip_frag_internal.c @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + +#include "rte_ip_frag.h" +#include "ip_frag_common.h" + +#define PRIME_VALUE 0xeaad8405 + +#define IP_FRAG_TBL_POS(tbl, sig) \ + ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) + +#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) +#else +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* IP_FRAG_TBL_STAT */ + +/* local frag table helper functions */ +static inline void +ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct rte_ip_frag_pkt *fp) +{ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); +} + +static inline void +ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_pkt *fp, + const struct ip_frag_key *key, uint64_t tms) +{ + fp->key = key[0]; + ip_frag_reset(fp, tms); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + tbl->use_entries++; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); +} + +static inline void +ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct rte_ip_frag_pkt *fp, uint64_t tms) +{ + ip_frag_free(fp, dr); + ip_frag_reset(fp, tms); + TAILQ_REMOVE(&tbl->lru, fp, lru); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); +} + + +static inline void +ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *)&key->src_dst; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +struct rte_mbuf * +ip_frag_process(struct rte_ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) +{ + uint32_t idx; + + fp->frag_size += len; + + /* this is the first fragment. */ + if (ofs == 0) { + idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ? + IP_FIRST_FRAG_IDX : UINT32_MAX; + + /* this is the last fragment. */ + } else if (more_frags == 0) { + fp->total_size = ofs + len; + idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ? + IP_LAST_FRAG_IDX : UINT32_MAX; + + /* this is the intermediate fragment. */ + } else if ((idx = fp->last_idx) < + sizeof (fp->frags) / sizeof (fp->frags[0])) { + fp->last_idx++; + } + + /* + * errorneous packet: either exceeed max allowed number of fragments, + * or duplicate first/last fragment encountered. + */ + if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { + + /* report an error. */ + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free all fragments, invalidate the entry. */ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + IP_FRAG_MBUF2DR(dr, mb); + + return (NULL); + } + + fp->frags[idx].ofs = ofs; + fp->frags[idx].len = len; + fp->frags[idx].mb = mb; + + mb = NULL; + + /* not all fragments are collected yet. */ + if (likely (fp->frag_size < fp->total_size)) { + return (mb); + + /* if we collected all fragments, then try to reassemble. */ + } else if (fp->frag_size == fp->total_size && + fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) + mb = ipv4_frag_reassemble(fp); + + /* errorenous set of fragments. */ + if (mb == NULL) { + + /* report an error. */ + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free associated resources. */ + ip_frag_free(fp, dr); + } + + /* we are done with that entry, invalidate it. */ + ip_frag_key_invalidate(&fp->key); + return (mb); +} + + +/* + * Find an entry in the table for the corresponding fragment. + * If such entry is not present, then allocate a new one. + * If the entry is stale, then free and reuse it. + */ +struct rte_ip_frag_pkt * +ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms) +{ + struct rte_ip_frag_pkt *pkt, *free, *stale, *lru; + uint64_t max_cycles; + + /* + * Actually the two line below are totally redundant. + * they are here, just to make gcc 4.6 happy. + */ + free = NULL; + stale = NULL; + max_cycles = tbl->max_cycles; + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); + + if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { + + /*timed-out entry, free and invalidate it*/ + if (stale != NULL) { + ip_frag_tbl_del(tbl, dr, stale); + free = stale; + + /* + * we found a free entry, check if we can use it. + * If we run out of free entries in the table, then + * check if we have a timed out entry to delete. + */ + } else if (free != NULL && + tbl->max_entries <= tbl->use_entries) { + lru = TAILQ_FIRST(&tbl->lru); + if (max_cycles + lru->start < tms) { + ip_frag_tbl_del(tbl, dr, lru); + } else { + free = NULL; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, + fail_nospace, 1); + } + } + + /* found a free entry to reuse. */ + if (free != NULL) { + ip_frag_tbl_add(tbl, free, key, tms); + pkt = free; + } + + /* + * we found the flow, but it is already timed out, + * so free associated resources, reposition it in the LRU list, + * and reuse it. + */ + } else if (max_cycles + pkt->start < tms) { + ip_frag_tbl_reuse(tbl, dr, pkt, tms); + } + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); + + tbl->last = pkt; + return (pkt); +} + +struct rte_ip_frag_pkt * +ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct rte_ip_frag_pkt **free, struct rte_ip_frag_pkt **stale) +{ + struct rte_ip_frag_pkt *p1, *p2; + struct rte_ip_frag_pkt *empty, *old; + uint64_t max_cycles; + uint32_t i, assoc, sig1, sig2; + + empty = NULL; + old = NULL; + + max_cycles = tbl->max_cycles; + assoc = tbl->bucket_entries; + + if (tbl->last != NULL && ip_frag_key_cmp(&tbl->last->key, key) == 0) + return (tbl->last); + + ipv4_frag_hash(key, &sig1, &sig2); + + p1 = IP_FRAG_TBL_POS(tbl, sig1); + p2 = IP_FRAG_TBL_POS(tbl, sig2); + + for (i = 0; i != assoc; i++) { + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + p1[i].key.src_dst[0], p1[i].key.id, p1[i].start); + + if (ip_frag_key_cmp(&p1[i].key, key) == 0) + return (p1 + i); + else if (ip_frag_key_is_empty(&p1[i].key)) + empty = (empty == NULL) ? (p1 + i) : empty; + else if (max_cycles + p1[i].start < tms) + old = (old == NULL) ? (p1 + i) : old; + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + p2[i].key.src_dst[0], p2[i].key.id, p2[i].start); + + if (ip_frag_key_cmp(&p2[i].key, key) == 0) + return (p2 + i); + else if (ip_frag_key_is_empty(&p2[i].key)) + empty = (empty == NULL) ?( p2 + i) : empty; + else if (max_cycles + p2[i].start < tms) + old = (old == NULL) ? (p2 + i) : old; + } + + *free = empty; + *stale = old; + return (NULL); +} diff --git a/lib/librte_ip_frag/ipv4_frag_tbl.h b/lib/librte_ip_frag/ipv4_frag_tbl.h deleted file mode 100644 index fa3291d..0000000 --- a/lib/librte_ip_frag/ipv4_frag_tbl.h +++ /dev/null @@ -1,400 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IPV4_FRAG_TBL_H_ -#define _IPV4_FRAG_TBL_H_ - -/** - * @file - * IPv4 fragments table. - * - * Implementation of IPv4 fragment table create/destroy/find/update. - * - */ - -/* - * The ip_frag_tbl is a simple hash table: - * The basic idea is to use two hash functions and - * associativity. This provides 2 * possible locations in - * the hash table for each key. Sort of simplified Cuckoo hashing, - * when the collision occurs and all 2 * are occupied, - * instead of resinserting existing keys into alternative locations, we just - * return a faiure. - * Another thing timing: entries that resides in the table longer then - * are considered as invalid, and could be removed/replaced - * byt the new ones. - * pair is stored together, all add/update/lookup opearions are not - * MT safe. - */ - -#include -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 -#include -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - -#define PRIME_VALUE 0xeaad8405 - -TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); - -struct ip_frag_tbl_stat { - uint64_t find_num; /* total # of find/insert attempts. */ - uint64_t add_num; /* # of add ops. */ - uint64_t del_num; /* # of del ops. */ - uint64_t reuse_num; /* # of reuse (del/add) ops. */ - uint64_t fail_total; /* total # of add failures. */ - uint64_t fail_nospace; /* # of 'no space' add failures. */ -} __rte_cache_aligned; - -struct ip_frag_tbl { - uint64_t max_cycles; /* ttl for table entries. */ - uint32_t entry_mask; /* hash value mask. */ - uint32_t max_entries; /* max entries allowed. */ - uint32_t use_entries; /* entries in use. */ - uint32_t bucket_entries; /* hash assocaitivity. */ - uint32_t nb_entries; /* total size of the table. */ - uint32_t nb_buckets; /* num of associativity lines. */ - struct ip_frag_pkt *last; /* last used entry. */ - struct ip_pkt_list lru; /* LRU list for table entries. */ - struct ip_frag_tbl_stat stat; /* statistics counters. */ - struct ip_frag_pkt pkt[0]; /* hash table. */ -}; - -#define IP_FRAG_TBL_POS(tbl, sig) \ - ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) - -#define IP_FRAG_HASH_FNUM 2 - -#ifdef IP_FRAG_TBL_STAT -#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) -#else -#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) -#endif /* IPV4_FRAG_TBL_STAT */ - -static inline void -ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) -{ - uint32_t v; - const uint32_t *p; - - p = (const uint32_t *)&key->src_dst; - -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 - v = rte_hash_crc_4byte(p[0], PRIME_VALUE); - v = rte_hash_crc_4byte(p[1], v); - v = rte_hash_crc_4byte(key->id, v); -#else - - v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - - *v1 = v; - *v2 = (v << 7) + (v >> 14); -} - -/* - * Update the table, after we finish processing it's entry. - */ -static inline void -ip_frag_inuse(struct ip_frag_tbl *tbl, const struct ip_frag_pkt *fp) -{ - if (IP_FRAG_KEY_EMPTY(&fp->key)) { - TAILQ_REMOVE(&tbl->lru, fp, lru); - tbl->use_entries--; - } -} - -/* - * For the given key, try to find an existing entry. - * If such entry doesn't exist, will return free and/or timed-out entry, - * that can be used for that key. - */ -static inline struct ip_frag_pkt * -ip_frag_lookup(struct ip_frag_tbl *tbl, - const struct ip_frag_key *key, uint64_t tms, - struct ip_frag_pkt **free, struct ip_frag_pkt **stale) -{ - struct ip_frag_pkt *p1, *p2; - struct ip_frag_pkt *empty, *old; - uint64_t max_cycles; - uint32_t i, assoc, sig1, sig2; - - empty = NULL; - old = NULL; - - max_cycles = tbl->max_cycles; - assoc = tbl->bucket_entries; - - if (tbl->last != NULL && IP_FRAG_KEY_CMP(&tbl->last->key, key) == 0) - return (tbl->last); - - ipv4_frag_hash(key, &sig1, &sig2); - p1 = IP_FRAG_TBL_POS(tbl, sig1); - p2 = IP_FRAG_TBL_POS(tbl, sig2); - - for (i = 0; i != assoc; i++) { - - IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ip_frag_pkt line0: %p, index: %u from %u\n" - "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - p1, i, assoc, - p1[i].key.src_dst, p1[i].key.id, p1[i].start); - - if (IP_FRAG_KEY_CMP(&p1[i].key, key) == 0) - return (p1 + i); - else if (IP_FRAG_KEY_EMPTY(&p1[i].key)) - empty = (empty == NULL) ? (p1 + i) : empty; - else if (max_cycles + p1[i].start < tms) - old = (old == NULL) ? (p1 + i) : old; - - IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ip_frag_pkt line1: %p, index: %u from %u\n" - "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - p2, i, assoc, - p2[i].key.src_dst, p2[i].key.id, p2[i].start); - - if (IP_FRAG_KEY_CMP(&p2[i].key, key) == 0) - return (p2 + i); - else if (IP_FRAG_KEY_EMPTY(&p2[i].key)) - empty = (empty == NULL) ?( p2 + i) : empty; - else if (max_cycles + p2[i].start < tms) - old = (old == NULL) ? (p2 + i) : old; - } - - *free = empty; - *stale = old; - return (NULL); -} - -static inline void -ip_frag_tbl_del(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr, - struct ip_frag_pkt *fp) -{ - ip_frag_free(fp, dr); - IP_FRAG_KEY_INVALIDATE(&fp->key); - TAILQ_REMOVE(&tbl->lru, fp, lru); - tbl->use_entries--; - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); -} - -static inline void -ip_frag_tbl_add(struct ip_frag_tbl *tbl, struct ip_frag_pkt *fp, - const struct ip_frag_key *key, uint64_t tms) -{ - fp->key = key[0]; - ip_frag_reset(fp, tms); - TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); - tbl->use_entries++; - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); -} - -static inline void -ip_frag_tbl_reuse(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr, - struct ip_frag_pkt *fp, uint64_t tms) -{ - ip_frag_free(fp, dr); - ip_frag_reset(fp, tms); - TAILQ_REMOVE(&tbl->lru, fp, lru); - TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); -} - -/* - * Find an entry in the table for the corresponding fragment. - * If such entry is not present, then allocate a new one. - * If the entry is stale, then free and reuse it. - */ -static inline struct ip_frag_pkt * -ip_frag_find(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr, - const struct ip_frag_key *key, uint64_t tms) -{ - struct ip_frag_pkt *pkt, *free, *stale, *lru; - uint64_t max_cycles; - - /* - * Actually the two line below are totally redundant. - * they are here, just to make gcc 4.6 happy. - */ - free = NULL; - stale = NULL; - max_cycles = tbl->max_cycles; - - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); - - if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { - - /*timed-out entry, free and invalidate it*/ - if (stale != NULL) { - ip_frag_tbl_del(tbl, dr, stale); - free = stale; - - /* - * we found a free entry, check if we can use it. - * If we run out of free entries in the table, then - * check if we have a timed out entry to delete. - */ - } else if (free != NULL && - tbl->max_entries <= tbl->use_entries) { - lru = TAILQ_FIRST(&tbl->lru); - if (max_cycles + lru->start < tms) { - ip_frag_tbl_del(tbl, dr, lru); - } else { - free = NULL; - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, - fail_nospace, 1); - } - } - - /* found a free entry to reuse. */ - if (free != NULL) { - ip_frag_tbl_add(tbl, free, key, tms); - pkt = free; - } - - /* - * we found the flow, but it is already timed out, - * so free associated resources, reposition it in the LRU list, - * and reuse it. - */ - } else if (max_cycles + pkt->start < tms) { - ip_frag_tbl_reuse(tbl, dr, pkt, tms); - } - - IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); - - tbl->last = pkt; - return (pkt); -} - -/* - * Create a new IPV4 Frag table. - * @param bucket_num - * Number of buckets in the hash table. - * @param bucket_entries - * Number of entries per bucket (e.g. hash associativity). - * Should be power of two. - * @param max_entries - * Maximum number of entries that could be stored in the table. - * The value should be less or equal then bucket_num * bucket_entries. - * @param max_cycles - * Maximum TTL in cycles for each fragmented packet. - * @param socket_id - * The *socket_id* argument is the socket identifier in the case of - * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. - * @return - * The pointer to the new allocated mempool, on success. NULL on error. - */ -static struct ip_frag_tbl * -rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries, - uint32_t max_entries, uint64_t max_cycles, int socket_id) -{ - struct ip_frag_tbl *tbl; - size_t sz; - uint64_t nb_entries; - - nb_entries = rte_align32pow2(bucket_num); - nb_entries *= bucket_entries; - nb_entries *= IP_FRAG_HASH_FNUM; - - /* check input parameters. */ - if (rte_is_power_of_2(bucket_entries) == 0 || - nb_entries > UINT32_MAX || nb_entries == 0 || - nb_entries < max_entries) { - RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); - return (NULL); - } - - sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); - if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, - socket_id)) == NULL) { - RTE_LOG(ERR, USER1, - "%s: allocation of %zu bytes at socket %d failed do\n", - __func__, sz, socket_id); - return (NULL); - } - - RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", - __func__, sz, socket_id); - - tbl->max_cycles = max_cycles; - tbl->max_entries = max_entries; - tbl->nb_entries = (uint32_t)nb_entries; - tbl->nb_buckets = bucket_num; - tbl->bucket_entries = bucket_entries; - tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); - - TAILQ_INIT(&(tbl->lru)); - return (tbl); -} - -static inline void -rte_ip_frag_table_destroy( struct ip_frag_tbl *tbl) -{ - rte_free(tbl); -} - -static void -rte_ip_frag_table_statistics_dump(FILE *f, const struct ip_frag_tbl *tbl) -{ - uint64_t fail_total, fail_nospace; - - fail_total = tbl->stat.fail_total; - fail_nospace = tbl->stat.fail_nospace; - - fprintf(f, "max entries:\t%u;\n" - "entries in use:\t%u;\n" - "finds/inserts:\t%" PRIu64 ";\n" - "entries added:\t%" PRIu64 ";\n" - "entries deleted by timeout:\t%" PRIu64 ";\n" - "entries reused by timeout:\t%" PRIu64 ";\n" - "total add failures:\t%" PRIu64 ";\n" - "add no-space failures:\t%" PRIu64 ";\n" - "add hash-collisions failures:\t%" PRIu64 ";\n", - tbl->max_entries, - tbl->use_entries, - tbl->stat.find_num, - tbl->stat.add_num, - tbl->stat.del_num, - tbl->stat.reuse_num, - fail_total, - fail_nospace, - fail_total - fail_nospace); -} - - -#endif /* _IPV4_FRAG_TBL_H_ */ diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index 0cf3878..327e1f1 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -1,13 +1,13 @@ /*- * BSD LICENSE - * + * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright @@ -17,7 +17,7 @@ * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,16 +31,147 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_IP_FRAG_H__ -#define _RTE_IP_FRAG_H__ +#ifndef _RTE_IP_FRAG_H_ +#define _RTE_IP_FRAG_H_ /** * @file - * RTE IPv4 Fragmentation + * RTE IPv4 Fragmentation and Reassembly + * + * Implementation of IPv4 packet fragmentation and reassembly. + */ + +#include +#include + +#include +#include +#include + +enum { + IP_LAST_FRAG_IDX, /**< index of last fragment */ + IP_FIRST_FRAG_IDX, /**< index of first fragment */ + IP_MIN_FRAG_NUM, /**< minimum number of fragments */ + IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG, + /**< maximum number of fragments per packet */ +}; + +/** @internal fragmented mbuf */ +struct ip_frag { + uint16_t ofs; /**< offset into the packet */ + uint16_t len; /**< length of fragment */ + struct rte_mbuf *mb; /**< fragment mbuf */ +}; + +/** @internal to uniquely indetify fragmented datagram. */ +struct ip_frag_key { + uint64_t src_dst; /**< src address */ + uint32_t id; /**< dst address */ +}; + +/* + * @internal Fragmented packet to reassemble. + * First two entries in the frags[] array are for the last and first fragments. + */ +struct rte_ip_frag_pkt { + TAILQ_ENTRY(rte_ip_frag_pkt) lru; /**< LRU list */ + struct ip_frag_key key; /**< fragmentation key */ + uint64_t start; /**< creation timestamp */ + uint32_t total_size; /**< expected reassembled size */ + uint32_t frag_size; /**< size of fragments received */ + uint32_t last_idx; /**< index of next entry to fill */ + struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */ +} __rte_cache_aligned; + +#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */ + +/** mbuf death row (packets to be freed) */ +struct rte_ip_frag_death_row { + uint32_t cnt; /**< number of mbufs currently on death row */ + struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)]; + /**< mbufs to be freed */ +}; + +TAILQ_HEAD(rte_ip_pkt_list, rte_ip_frag_pkt); /**< @internal fragments tailq */ + +/** fragmentation table statistics */ +struct rte_ip_frag_tbl_stat { + uint64_t find_num; /**< total # of find/insert attempts. */ + uint64_t add_num; /**< # of add ops. */ + uint64_t del_num; /**< # of del ops. */ + uint64_t reuse_num; /**< # of reuse (del/add) ops. */ + uint64_t fail_total; /**< total # of add failures. */ + uint64_t fail_nospace; /**< # of 'no space' add failures. */ +} __rte_cache_aligned; + +/** fragmentation table */ +struct rte_ip_frag_tbl { + uint64_t max_cycles; /**< ttl for table entries. */ + uint32_t entry_mask; /**< hash value mask. */ + uint32_t max_entries; /**< max entries allowed. */ + uint32_t use_entries; /**< entries in use. */ + uint32_t bucket_entries; /**< hash assocaitivity. */ + uint32_t nb_entries; /**< total size of the table. */ + uint32_t nb_buckets; /**< num of associativity lines. */ + struct rte_ip_frag_pkt *last; /**< last used entry. */ + struct rte_ip_pkt_list lru; /**< LRU list for table entries. */ + struct rte_ip_frag_tbl_stat stat; /**< statistics counters. */ + struct rte_ip_frag_pkt pkt[0]; /**< hash table. */ +}; + +/** IPv6 fragment extension header */ +struct ipv6_extension_fragment { + uint8_t next_header; /**< Next header type */ + uint8_t reserved1; /**< Reserved */ + union { + struct { + uint16_t frag_offset:13; /**< Offset from the start of the packet */ + uint16_t reserved2:2; /**< Reserved */ + uint16_t more_frags:1; + /**< 1 if more fragments left, 0 if last fragment */ + }; + uint16_t frag_data; + /**< union of all fragmentation data */ + }; + uint32_t id; /**< Packet ID */ +} __attribute__((__packed__)); + + + +/* + * Create a new IP fragmentation table. * - * Implementation of IPv4 fragmentation. + * @param bucket_num + * Number of buckets in the hash table. + * @param bucket_entries + * Number of entries per bucket (e.g. hash associativity). + * Should be power of two. + * @param max_entries + * Maximum number of entries that could be stored in the table. + * The value should be less or equal then bucket_num * bucket_entries. + * @param max_cycles + * Maximum TTL in cycles for each fragmented packet. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. + * @return + * The pointer to the new allocated fragmentation table, on success. NULL on error. + */ +struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, + uint32_t bucket_entries, uint32_t max_entries, + uint64_t max_cycles, int socket_id); + +/* + * Free allocated IP fragmentation table. * + * @param btl + * Fragmentation table to free. */ +static inline void +rte_ip_frag_table_destroy( struct rte_ip_frag_tbl *tbl) +{ + rte_free(tbl); +} /** * IPv4 fragmentation. @@ -64,10 +195,74 @@ * Otherwise - (-1) * . */ int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in, - struct rte_mbuf **pkts_out, - uint16_t nb_pkts_out, - uint16_t mtu_size, - struct rte_mempool *pool_direct, - struct rte_mempool *pool_indirect); + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +/* + * This function implements reassembly of fragmented IPv4 packets. + * Incoming mbufs should have its l2_len/l3_len fields setup correclty. + * + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param dr + * Death row to free buffers to + * @param mb + * Incoming mbuf with IPv4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassebled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr); + +/* + * Check if the IPv4 packet is fragmented + * + * @param hdr + * IPv4 header of the packet + * @return + * 1 if fragmented, 0 if not fragmented + */ +static inline int +rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) { + uint16_t flag_offset, ip_flag, ip_ofs; + + flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + return ip_flag != 0 || ip_ofs != 0; +} + +/* + * Free mbufs on a given death row. + * + * @param dr + * Death row to free mbufs in. + * @param prefetch + * How many buffers to prefetch before freeing. + */ +void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch); + + +/* + * Dump fragmentation table statistics to file. + * + * @param f + * File to dump statistics to + * @param tbl + * Fragmentation table to dump statistics from + */ +void +rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); -#endif +#endif /* _RTE_IP_FRAG_H_ */ diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c new file mode 100644 index 0000000..acd1864 --- /dev/null +++ b/lib/librte_ip_frag/rte_ip_frag_common.c @@ -0,0 +1,142 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +#include "rte_ip_frag.h" +#include "ip_frag_common.h" + +#define IP_FRAG_HASH_FNUM 2 + +/* free mbufs from death row */ +void +rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch) +{ + uint32_t i, k, n; + + k = RTE_MIN(prefetch, dr->cnt); + n = dr->cnt; + + for (i = 0; i != k; i++) + rte_prefetch0(dr->row[i]); + + for (i = 0; i != n - k; i++) { + rte_prefetch0(dr->row[i + k]); + rte_pktmbuf_free(dr->row[i]); + } + + for (; i != n; i++) + rte_pktmbuf_free(dr->row[i]); + + dr->cnt = 0; +} + +/* create fragmentation table */ +struct rte_ip_frag_tbl * +rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries, + uint32_t max_entries, uint64_t max_cycles, int socket_id) +{ + struct rte_ip_frag_tbl *tbl; + size_t sz; + uint64_t nb_entries; + + nb_entries = rte_align32pow2(bucket_num); + nb_entries *= bucket_entries; + nb_entries *= IP_FRAG_HASH_FNUM; + + /* check input parameters. */ + if (rte_is_power_of_2(bucket_entries) == 0 || + nb_entries > UINT32_MAX || nb_entries == 0 || + nb_entries < max_entries) { + RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); + return (NULL); + } + + sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); + if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE, + socket_id)) == NULL) { + RTE_LOG(ERR, USER1, + "%s: allocation of %zu bytes at socket %d failed do\n", + __func__, sz, socket_id); + return (NULL); + } + + RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", + __func__, sz, socket_id); + + tbl->max_cycles = max_cycles; + tbl->max_entries = max_entries; + tbl->nb_entries = (uint32_t)nb_entries; + tbl->nb_buckets = bucket_num; + tbl->bucket_entries = bucket_entries; + tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); + + TAILQ_INIT(&(tbl->lru)); + return (tbl); +} + +/* dump frag table statistics to file */ +void +rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl) +{ + uint64_t fail_total, fail_nospace; + + fail_total = tbl->stat.fail_total; + fail_nospace = tbl->stat.fail_nospace; + + fprintf(f, "max entries:\t%u;\n" + "entries in use:\t%u;\n" + "finds/inserts:\t%" PRIu64 ";\n" + "entries added:\t%" PRIu64 ";\n" + "entries deleted by timeout:\t%" PRIu64 ";\n" + "entries reused by timeout:\t%" PRIu64 ";\n" + "total add failures:\t%" PRIu64 ";\n" + "add no-space failures:\t%" PRIu64 ";\n" + "add hash-collisions failures:\t%" PRIu64 ";\n", + tbl->max_entries, + tbl->use_entries, + tbl->stat.find_num, + tbl->stat.add_num, + tbl->stat.del_num, + tbl->stat.reuse_num, + fail_total, + fail_nospace, + fail_total - fail_nospace); +} diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c new file mode 100644 index 0000000..483fb95 --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -0,0 +1,189 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "rte_ip_frag.h" +#include "ip_frag_common.h" + +/* + * Reassemble fragments into one packet. + */ +struct rte_mbuf * +ipv4_frag_reassemble(const struct rte_ip_frag_pkt *fp) +{ + struct ipv4_hdr *ip_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + + first_len = fp->frags[IP_FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[IP_LAST_FRAG_IDX].mb; + ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + + ip_frag_chain(fp->frags[i].mb, m); + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return (NULL); + } + } + + /* chain with the first fragment. */ + ip_frag_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + m = fp->frags[IP_FIRST_FRAG_IDX].mb; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv4 header for the reassmebled packet */ + ip_hdr = (struct ipv4_hdr*)(rte_pktmbuf_mtod(m, uint8_t *) + + m->pkt.vlan_macip.f.l2_len); + + ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + + m->pkt.vlan_macip.f.l3_len)); + ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); + ip_hdr->hdr_checksum = 0; + + return (m); +} + +/* + * Process new mbuf with fragment of IPV4 packet. + * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassebled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * +rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv4_hdr *ip_hdr) +{ + struct rte_ip_frag_pkt *fp; + struct ip_frag_key key; + const uint64_t *psd; + uint16_t ip_len; + uint16_t flag_offset, ip_ofs, ip_flag; + + flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + psd = (uint64_t *)&ip_hdr->src_addr; + key.src_dst = *psd; + key.id = ip_hdr->packet_id; + + ip_ofs *= IPV4_HDR_OFFSET_UNITS; + ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - + mb->pkt.vlan_macip.f.l3_len); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag, + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* try to find/add entry into the fragment's table. */ + if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { + IP_FRAG_MBUF2DR(dr, mb); + return (NULL); + } + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst, fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); + ip_frag_inuse(tbl, fp); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst, fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return (mb); +} diff --git a/lib/librte_ip_frag/rte_ipv4_rsmbl.h b/lib/librte_ip_frag/rte_ipv4_rsmbl.h deleted file mode 100644 index 82cb9b5..0000000 --- a/lib/librte_ip_frag/rte_ipv4_rsmbl.h +++ /dev/null @@ -1,427 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IPV4_RSMBL_H_ -#define _IPV4_RSMBL_H_ - -#include "ip_frag_common.h" - -/** - * @file - * IPv4 reassemble - * - * Implementation of IPv4 reassemble. - * - */ - -enum { - LAST_FRAG_IDX, - FIRST_FRAG_IDX, - MIN_FRAG_NUM, - MAX_FRAG_NUM = 4, -}; - -struct ip_frag { - uint16_t ofs; - uint16_t len; - struct rte_mbuf *mb; -}; - -/* - * Use to uniquely indetify fragmented datagram. - */ -struct ip_frag_key { - uint64_t src_dst; - uint32_t id; -}; - -#define IP_FRAG_KEY_INVALIDATE(k) ((k)->src_dst = 0) -#define IP_FRAG_KEY_EMPTY(k) ((k)->src_dst == 0) - -#define IP_FRAG_KEY_CMP(k1, k2) \ - (((k1)->src_dst ^ (k2)->src_dst) | ((k1)->id ^ (k2)->id)) - - -/* - * Fragmented packet to reassemble. - * First two entries in the frags[] array are for the last and first fragments. - */ -struct ip_frag_pkt { - TAILQ_ENTRY(ip_frag_pkt) lru; /* LRU list */ - struct ip_frag_key key; - uint64_t start; /* creation timestamp */ - uint32_t total_size; /* expected reassembled size */ - uint32_t frag_size; /* size of fragments received */ - uint32_t last_idx; /* index of next entry to fill */ - struct ip_frag frags[MAX_FRAG_NUM]; -} __rte_cache_aligned; - - -struct ip_frag_death_row { - uint32_t cnt; - struct rte_mbuf *row[MAX_PKT_BURST * (MAX_FRAG_NUM + 1)]; -}; - -#define IP_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) - -/* logging macros. */ - -#ifdef IP_FRAG_DEBUG -#define IP_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) -#else -#define IP_FRAG_LOG(lvl, fmt, args...) do {} while(0) -#endif /* IP_FRAG_DEBUG */ - - -static inline void -ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms) -{ - static const struct ip_frag zero_frag = { - .ofs = 0, - .len = 0, - .mb = NULL, - }; - - fp->start = tms; - fp->total_size = UINT32_MAX; - fp->frag_size = 0; - fp->last_idx = MIN_FRAG_NUM; - fp->frags[LAST_FRAG_IDX] = zero_frag; - fp->frags[FIRST_FRAG_IDX] = zero_frag; -} - -static inline void -ip_frag_free(struct ip_frag_pkt *fp, struct ip_frag_death_row *dr) -{ - uint32_t i, k; - - k = dr->cnt; - for (i = 0; i != fp->last_idx; i++) { - if (fp->frags[i].mb != NULL) { - dr->row[k++] = fp->frags[i].mb; - fp->frags[i].mb = NULL; - } - } - - fp->last_idx = 0; - dr->cnt = k; -} - -static inline void -rte_ip_frag_free_death_row(struct ip_frag_death_row *dr, uint32_t prefetch) -{ - uint32_t i, k, n; - - k = RTE_MIN(prefetch, dr->cnt); - n = dr->cnt; - - for (i = 0; i != k; i++) - rte_prefetch0(dr->row[i]); - - for (i = 0; i != n - k; i++) { - rte_prefetch0(dr->row[i + k]); - rte_pktmbuf_free(dr->row[i]); - } - - for (; i != n; i++) - rte_pktmbuf_free(dr->row[i]); - - dr->cnt = 0; -} - -/* - * Helper function. - * Takes 2 mbufs that represents two framents of the same packet and - * chains them into one mbuf. - */ -static inline void -ip_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp) -{ - struct rte_mbuf *ms; - - /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len + - mp->pkt.vlan_macip.f.l3_len)); - - /* chain two fragments. */ - ms = rte_pktmbuf_lastseg(mn); - ms->pkt.next = mp; - - /* accumulate number of segments and total length. */ - mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs); - mn->pkt.pkt_len += mp->pkt.pkt_len; - - /* reset pkt_len and nb_segs for chained fragment. */ - mp->pkt.pkt_len = mp->pkt.data_len; - mp->pkt.nb_segs = 1; -} - -/* - * Reassemble fragments into one packet. - */ -static inline struct rte_mbuf * -ipv4_frag_reassemble(const struct ip_frag_pkt *fp) -{ - struct ipv4_hdr *ip_hdr; - struct rte_mbuf *m, *prev; - uint32_t i, n, ofs, first_len; - - first_len = fp->frags[FIRST_FRAG_IDX].len; - n = fp->last_idx - 1; - - /*start from the last fragment. */ - m = fp->frags[LAST_FRAG_IDX].mb; - ofs = fp->frags[LAST_FRAG_IDX].ofs; - - while (ofs != first_len) { - - prev = m; - - for (i = n; i != FIRST_FRAG_IDX && ofs != first_len; i--) { - - /* previous fragment found. */ - if(fp->frags[i].ofs + fp->frags[i].len == ofs) { - - ip_frag_chain(fp->frags[i].mb, m); - - /* update our last fragment and offset. */ - m = fp->frags[i].mb; - ofs = fp->frags[i].ofs; - } - } - - /* error - hole in the packet. */ - if (m == prev) { - return (NULL); - } - } - - /* chain with the first fragment. */ - ip_frag_chain(fp->frags[FIRST_FRAG_IDX].mb, m); - m = fp->frags[FIRST_FRAG_IDX].mb; - - /* update mbuf fields for reassembled packet. */ - m->ol_flags |= PKT_TX_IP_CKSUM; - - /* update ipv4 header for the reassmebled packet */ - ip_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, uint8_t *) + - m->pkt.vlan_macip.f.l2_len); - - ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + - m->pkt.vlan_macip.f.l3_len)); - ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & - rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); - ip_hdr->hdr_checksum = 0; - - return (m); -} - -static inline struct rte_mbuf * -ip_frag_process(struct ip_frag_pkt *fp, struct ip_frag_death_row *dr, - struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) -{ - uint32_t idx; - - fp->frag_size += len; - - /* this is the first fragment. */ - if (ofs == 0) { - idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ? - FIRST_FRAG_IDX : UINT32_MAX; - - /* this is the last fragment. */ - } else if (more_frags == 0) { - fp->total_size = ofs + len; - idx = (fp->frags[LAST_FRAG_IDX].mb == NULL) ? - LAST_FRAG_IDX : UINT32_MAX; - - /* this is the intermediate fragment. */ - } else if ((idx = fp->last_idx) < - sizeof (fp->frags) / sizeof (fp->frags[0])) { - fp->last_idx++; - } - - /* - * errorneous packet: either exceeed max allowed number of fragments, - * or duplicate first/last fragment encountered. - */ - if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { - - /* report an error. */ - IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " - "total_size: %u, frag_size: %u, last_idx: %u\n" - "first fragment: ofs: %u, len: %u\n" - "last fragment: ofs: %u, len: %u\n\n", - __func__, __LINE__, - fp, fp->key.src_dst, fp->key.id, - fp->total_size, fp->frag_size, fp->last_idx, - fp->frags[FIRST_FRAG_IDX].ofs, - fp->frags[FIRST_FRAG_IDX].len, - fp->frags[LAST_FRAG_IDX].ofs, - fp->frags[LAST_FRAG_IDX].len); - - /* free all fragments, invalidate the entry. */ - ip_frag_free(fp, dr); - IP_FRAG_KEY_INVALIDATE(&fp->key); - IP_FRAG_MBUF2DR(dr, mb); - - return (NULL); - } - - fp->frags[idx].ofs = ofs; - fp->frags[idx].len = len; - fp->frags[idx].mb = mb; - - mb = NULL; - - /* not all fragments are collected yet. */ - if (likely (fp->frag_size < fp->total_size)) { - return (mb); - - /* if we collected all fragments, then try to reassemble. */ - } else if (fp->frag_size == fp->total_size && - fp->frags[FIRST_FRAG_IDX].mb != NULL) { - mb = ipv4_frag_reassemble(fp); - } - - /* errorenous set of fragments. */ - if (mb == NULL) { - - /* report an error. */ - IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " - "total_size: %u, frag_size: %u, last_idx: %u\n" - "first fragment: ofs: %u, len: %u\n" - "last fragment: ofs: %u, len: %u\n\n", - __func__, __LINE__, - fp, fp->key.src_dst, fp->key.id, - fp->total_size, fp->frag_size, fp->last_idx, - fp->frags[FIRST_FRAG_IDX].ofs, - fp->frags[FIRST_FRAG_IDX].len, - fp->frags[LAST_FRAG_IDX].ofs, - fp->frags[LAST_FRAG_IDX].len); - - /* free associated resources. */ - ip_frag_free(fp, dr); - } - - /* we are done with that entry, invalidate it. */ - IP_FRAG_KEY_INVALIDATE(&fp->key); - return (mb); -} - -#include "ipv4_frag_tbl.h" - -/* - * Process new mbuf with fragment of IPV4 packet. - * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. - * @param tbl - * Table where to lookup/add the fragmented packet. - * @param mb - * Incoming mbuf with IPV4 fragment. - * @param tms - * Fragment arrival timestamp. - * @param ip_hdr - * Pointer to the IPV4 header inside the fragment. - * @param ip_ofs - * Fragment's offset (as extracted from the header). - * @param ip_flag - * Fragment's MF flag. - * @return - * Pointer to mbuf for reassebled packet, or NULL if: - * - an error occured. - * - not all fragments of the packet are collected yet. - */ -static inline struct rte_mbuf * -rte_ipv4_reassemble_packet(struct ip_frag_tbl *tbl, - struct ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct ipv4_hdr *ip_hdr, uint16_t ip_ofs, uint16_t ip_flag) -{ - struct ip_frag_pkt *fp; - struct ip_frag_key key; - const uint64_t *psd; - uint16_t ip_len; - - psd = (uint64_t *)&ip_hdr->src_addr; - key.src_dst = psd[0]; - key.id = ip_hdr->packet_id; - - ip_ofs *= IPV4_HDR_OFFSET_UNITS; - ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - - mb->pkt.vlan_macip.f.l3_len); - - IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "mbuf: %p, tms: %" PRIu64 - ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" - "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " - "max_entries: %u, use_entries: %u\n\n", - __func__, __LINE__, - mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag, - tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, - tbl->use_entries); - - /* try to find/add entry into the fragment's table. */ - if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { - IP_FRAG_MBUF2DR(dr, mb); - return NULL; - } - - IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 - ", total_size: %u, frag_size: %u, last_idx: %u\n\n", - __func__, __LINE__, - tbl, tbl->max_entries, tbl->use_entries, - fp, fp->key.src_dst, fp->key.id, fp->start, - fp->total_size, fp->frag_size, fp->last_idx); - - - /* process the fragmented packet. */ - mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); - ip_frag_inuse(tbl, fp); - - IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "mbuf: %p\n" - "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 - ", total_size: %u, frag_size: %u, last_idx: %u\n\n", - __func__, __LINE__, mb, - tbl, tbl->max_entries, tbl->use_entries, - fp, fp->key.src_dst, fp->key.id, fp->start, - fp->total_size, fp->frag_size, fp->last_idx); - - return (mb); -} - -#endif /* _IPV4_RSMBL_H_ */ -- 1.8.1.4