DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 07/13] ip_frag: refactored reassembly code and made it a proper library
Date: Wed, 28 May 2014 18:32:41 +0100	[thread overview]
Message-ID: <1625c043bb49508dd2aab9877f86183ab7e403d7.1401298292.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1401298292.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1401298292.git.anatoly.burakov@intel.com>


Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 config/common_bsdapp                     |   2 +
 config/common_linuxapp                   |   2 +
 examples/ip_reassembly/main.c            |  24 +-
 lib/librte_ip_frag/Makefile              |   6 +-
 lib/librte_ip_frag/ip_frag_common.h      | 134 +++++++++-
 lib/librte_ip_frag/ip_frag_internal.c    | 337 ++++++++++++++++++++++++
 lib/librte_ip_frag/ipv4_frag_tbl.h       | 400 -----------------------------
 lib/librte_ip_frag/rte_ip_frag.h         | 223 +++++++++++++++-
 lib/librte_ip_frag/rte_ip_frag_common.c  | 142 ++++++++++
 lib/librte_ip_frag/rte_ipv4_reassembly.c | 189 ++++++++++++++
 lib/librte_ip_frag/rte_ipv4_rsmbl.h      | 427 -------------------------------
 11 files changed, 1023 insertions(+), 863 deletions(-)
 create mode 100644 lib/librte_ip_frag/ip_frag_internal.c
 delete mode 100644 lib/librte_ip_frag/ipv4_frag_tbl.h
 create mode 100644 lib/librte_ip_frag/rte_ip_frag_common.c
 create mode 100644 lib/librte_ip_frag/rte_ipv4_reassembly.c
 delete mode 100644 lib/librte_ip_frag/rte_ipv4_rsmbl.h

diff --git a/config/common_bsdapp b/config/common_bsdapp
index d30802e..be56ca7 100644
--- a/config/common_bsdapp
+++ b/config/common_bsdapp
@@ -261,6 +261,8 @@ CONFIG_RTE_LIBRTE_NET=y
 # Compile librte_net
 #
 CONFIG_RTE_LIBRTE_IP_FRAG=y
+CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n
+CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4
 
 #
 # Compile librte_meter
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 074d961..4d58496 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -288,6 +288,8 @@ CONFIG_RTE_LIBRTE_NET=y
 # Compile librte_net
 #
 CONFIG_RTE_LIBRTE_IP_FRAG=y
+CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n
+CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4
 
 #
 # Compile librte_meter
diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
index 23ec4be..6c40d76 100644
--- a/examples/ip_reassembly/main.c
+++ b/examples/ip_reassembly/main.c
@@ -94,7 +94,7 @@
 
 #define MAX_PKT_BURST 32
 
-#include "rte_ipv4_rsmbl.h"
+#include "rte_ip_frag.h"
 
 #ifndef IPv6_BYTES
 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
@@ -407,9 +407,9 @@ struct lcore_conf {
 #else
 	lookup_struct_t * ipv6_lookup_struct;
 #endif
-	struct ip_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE];
+	struct rte_ip_frag_tbl *frag_tbl[MAX_RX_QUEUE_PER_LCORE];
 	struct rte_mempool *pool[MAX_RX_QUEUE_PER_LCORE];
-	struct ip_frag_death_row death_row;
+	struct rte_ip_frag_death_row death_row;
 	struct mbuf_table *tx_mbufs[MAX_PORTS];
 	struct tx_lcore_stat tx_stat;
 } __rte_cache_aligned;
@@ -645,7 +645,6 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue,
 	struct ipv4_hdr *ipv4_hdr;
 	void *d_addr_bytes;
 	uint8_t dst_port;
-	uint16_t flag_offset, ip_flag, ip_ofs;
 
 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 
@@ -665,16 +664,12 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue,
 		++(ipv4_hdr->hdr_checksum);
 #endif
 
-		flag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
-		ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
-		ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
-
 		 /* if it is a fragmented packet, then try to reassemble. */
-		if (ip_flag != 0 || ip_ofs  != 0) {
+		if (rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)) {
 
 			struct rte_mbuf *mo;
-			struct ip_frag_tbl *tbl;
-			struct ip_frag_death_row *dr;
+			struct rte_ip_frag_tbl *tbl;
+			struct rte_ip_frag_death_row *dr;
 
 			tbl = qconf->frag_tbl[queue];
 			dr = &qconf->death_row;
@@ -684,8 +679,8 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, uint32_t queue,
 			m->pkt.vlan_macip.f.l3_len = sizeof(*ipv4_hdr);
 
 			/* process this fragment. */
-			if ((mo = rte_ipv4_reassemble_packet(tbl, dr, m, tms, ipv4_hdr,
-					ip_ofs, ip_flag)) == NULL) 
+			if ((mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms,
+					ipv4_hdr)) == NULL)
 				/* no packet to send out. */
 				return;
 
@@ -1469,7 +1464,8 @@ setup_queue_tbl(struct lcore_conf *qconf, uint32_t lcore, int socket,
 	 * Plus, each TX queue can hold up to <max_flow_num> packets.
 	 */ 
 
-	nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM;
+	nb_mbuf = 2 * RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) *
+			RTE_LIBRTE_IP_FRAG_MAX_FRAG;
 	nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE;
 	nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT;
 
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
index 13a83b1..022092d 100644
--- a/lib/librte_ip_frag/Makefile
+++ b/lib/librte_ip_frag/Makefile
@@ -39,11 +39,13 @@ CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
 
 #source files
 SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
-SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += ipv4_frag_tbl.h
-SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ipv4_rsmbl.h
+
 
 # this library depends on rte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether
diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h
index 6d4706a..3e588a0 100644
--- a/lib/librte_ip_frag/ip_frag_common.h
+++ b/lib/librte_ip_frag/ip_frag_common.h
@@ -36,19 +36,141 @@
 
 #include "rte_ip_frag.h"
 
-/* Debug on/off */
-#ifdef RTE_IP_FRAG_DEBUG
+/* logging macros. */
+#ifdef RTE_LIBRTE_IP_FRAG_DEBUG
+
+#define	IP_FRAG_LOG(lvl, fmt, args...)	RTE_LOG(lvl, USER1, fmt, ##args)
 
 #define	RTE_IP_FRAG_ASSERT(exp)					\
 if (!(exp))	{							\
 	rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n",	\
 		__func__, __LINE__);					\
 }
+#else
+#define	IP_FRAG_LOG(lvl, fmt, args...)	do {} while(0)
+#define RTE_IP_FRAG_ASSERT(exp)	do { } while(0)
+#endif /* IP_FRAG_DEBUG */
+
+/* helper macros */
+#define	IP_FRAG_MBUF2DR(dr, mb)	((dr)->row[(dr)->cnt++] = (mb))
+
+/* internal functions declarations */
+struct rte_mbuf * ip_frag_process(struct rte_ip_frag_pkt *fp,
+		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
+		uint16_t ofs, uint16_t len, uint16_t more_frags);
+
+struct rte_ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr,
+		const struct ip_frag_key *key, uint64_t tms);
+
+struct rte_ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+	const struct ip_frag_key *key, uint64_t tms,
+	struct rte_ip_frag_pkt **free, struct rte_ip_frag_pkt **stale);
+
+/* these functions need to be declared here as ip_frag_process relies on them */
+struct rte_mbuf * ipv4_frag_reassemble(const struct rte_ip_frag_pkt *fp);
+
+
+
+/*
+ * misc frag key functions
+ */
+
+/* check if key is empty */
+static inline int
+ip_frag_key_is_empty(const struct ip_frag_key * key)
+{
+	if (key->src_dst != 0)
+		return 0;
+	return 1;
+}
 
-#else /*RTE_IP_FRAG_DEBUG*/
+/* empty the key */
+static inline void
+ip_frag_key_invalidate(struct ip_frag_key * key)
+{
+	key->src_dst = 0;
+}
+
+/* compare two keys */
+static inline int
+ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
+{
+	return k1->src_dst ^ k2->src_dst;
+}
 
-#define RTE_IP_FRAG_ASSERT(exp)	do { } while (0)
+/*
+ * misc fragment functions
+ */
+
+/* put fragment on death row */
+static inline void
+ip_frag_free(struct rte_ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr)
+{
+	uint32_t i, k;
+
+	k = dr->cnt;
+	for (i = 0; i != fp->last_idx; i++) {
+		if (fp->frags[i].mb != NULL) {
+			dr->row[k++] = fp->frags[i].mb;
+			fp->frags[i].mb = NULL;
+		}
+	}
+
+	fp->last_idx = 0;
+	dr->cnt = k;
+}
+
+/* if key is empty, mark key as in use */
+static inline void
+ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct  rte_ip_frag_pkt *fp)
+{
+	if (ip_frag_key_is_empty(&fp->key)) {
+		TAILQ_REMOVE(&tbl->lru, fp, lru);
+		tbl->use_entries--;
+	}
+}
+
+/* reset the fragment */
+static inline void
+ip_frag_reset(struct rte_ip_frag_pkt *fp, uint64_t tms)
+{
+	static const struct ip_frag zero_frag = {
+		.ofs = 0,
+		.len = 0,
+		.mb = NULL,
+	};
+
+	fp->start = tms;
+	fp->total_size = UINT32_MAX;
+	fp->frag_size = 0;
+	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
+	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
+}
+
+/* chain two mbufs */
+static inline void
+ip_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp)
+{
+	struct rte_mbuf *ms;
+
+	/* adjust start of the last fragment data. */
+	rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len +
+		mp->pkt.vlan_macip.f.l3_len));
+
+	/* chain two fragments. */
+	ms = rte_pktmbuf_lastseg(mn);
+	ms->pkt.next = mp;
+
+	/* accumulate number of segments and total length. */
+	mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs);
+	mn->pkt.pkt_len += mp->pkt.pkt_len;
+
+	/* reset pkt_len and nb_segs for chained fragment. */
+	mp->pkt.pkt_len = mp->pkt.data_len;
+	mp->pkt.nb_segs = 1;
+}
 
-#endif /*RTE_IP_FRAG_DEBUG*/
 
-#endif
+#endif /* _IP_FRAG_COMMON_H_ */
diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c
new file mode 100644
index 0000000..2f5a4b8
--- /dev/null
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -0,0 +1,337 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_byteorder.h>
+#include <rte_jhash.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+#include "rte_ip_frag.h"
+#include "ip_frag_common.h"
+
+#define	PRIME_VALUE	0xeaad8405
+
+#define	IP_FRAG_TBL_POS(tbl, sig)	\
+	((tbl)->pkt + ((sig) & (tbl)->entry_mask))
+
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	((s)->f += (v))
+#else
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	do {} while (0)
+#endif /* IP_FRAG_TBL_STAT */
+
+/* local frag table helper functions */
+static inline void
+ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	struct rte_ip_frag_pkt *fp)
+{
+	ip_frag_free(fp, dr);
+	ip_frag_key_invalidate(&fp->key);
+	TAILQ_REMOVE(&tbl->lru, fp, lru);
+	tbl->use_entries--;
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
+}
+
+static inline void
+ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl,  struct rte_ip_frag_pkt *fp,
+	const struct ip_frag_key *key, uint64_t tms)
+{
+	fp->key = key[0];
+	ip_frag_reset(fp, tms);
+	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+	tbl->use_entries++;
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
+}
+
+static inline void
+ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	struct rte_ip_frag_pkt *fp, uint64_t tms)
+{
+	ip_frag_free(fp, dr);
+	ip_frag_reset(fp, tms);
+	TAILQ_REMOVE(&tbl->lru, fp, lru);
+	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
+}
+
+
+static inline void
+ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
+{
+	uint32_t v;
+	const uint32_t *p;
+
+	p = (const uint32_t *)&key->src_dst;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
+	v = rte_hash_crc_4byte(p[1], v);
+	v = rte_hash_crc_4byte(key->id, v);
+#else
+
+	v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+	*v1 =  v;
+	*v2 = (v << 7) + (v >> 14);
+}
+
+struct rte_mbuf *
+ip_frag_process(struct rte_ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
+	struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
+{
+	uint32_t idx;
+
+	fp->frag_size += len;
+
+	/* this is the first fragment. */
+	if (ofs == 0) {
+		idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
+				IP_FIRST_FRAG_IDX : UINT32_MAX;
+
+	/* this is the last fragment. */
+	} else if (more_frags == 0) {
+		fp->total_size = ofs + len;
+		idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
+				IP_LAST_FRAG_IDX : UINT32_MAX;
+
+	/* this is the intermediate fragment. */
+	} else if ((idx = fp->last_idx) <
+		sizeof (fp->frags) / sizeof (fp->frags[0])) {
+		fp->last_idx++;
+	}
+
+	/*
+	 * errorneous packet: either exceeed max allowed number of fragments,
+	 * or duplicate first/last fragment encountered.
+	 */
+	if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
+
+		/* report an error. */
+		IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+			"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+			"total_size: %u, frag_size: %u, last_idx: %u\n"
+			"first fragment: ofs: %u, len: %u\n"
+			"last fragment: ofs: %u, len: %u\n\n",
+			__func__, __LINE__,
+			fp, fp->key.src_dst[0], fp->key.id,
+			fp->total_size, fp->frag_size, fp->last_idx,
+			fp->frags[IP_FIRST_FRAG_IDX].ofs,
+			fp->frags[IP_FIRST_FRAG_IDX].len,
+			fp->frags[IP_LAST_FRAG_IDX].ofs,
+			fp->frags[IP_LAST_FRAG_IDX].len);
+
+		/* free all fragments, invalidate the entry. */
+		ip_frag_free(fp, dr);
+		ip_frag_key_invalidate(&fp->key);
+		IP_FRAG_MBUF2DR(dr, mb);
+
+		return (NULL);
+	}
+
+	fp->frags[idx].ofs = ofs;
+	fp->frags[idx].len = len;
+	fp->frags[idx].mb = mb;
+
+	mb = NULL;
+
+	/* not all fragments are collected yet. */
+	if (likely (fp->frag_size < fp->total_size)) {
+		return (mb);
+
+	/* if we collected all fragments, then try to reassemble. */
+	} else if (fp->frag_size == fp->total_size &&
+			fp->frags[IP_FIRST_FRAG_IDX].mb != NULL)
+		mb = ipv4_frag_reassemble(fp);
+
+	/* errorenous set of fragments. */
+	if (mb == NULL) {
+
+		/* report an error. */
+		IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+			"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+			"total_size: %u, frag_size: %u, last_idx: %u\n"
+			"first fragment: ofs: %u, len: %u\n"
+			"last fragment: ofs: %u, len: %u\n\n",
+			__func__, __LINE__,
+			fp, fp->key.src_dst[0], fp->key.id,
+			fp->total_size, fp->frag_size, fp->last_idx,
+			fp->frags[IP_FIRST_FRAG_IDX].ofs,
+			fp->frags[IP_FIRST_FRAG_IDX].len,
+			fp->frags[IP_LAST_FRAG_IDX].ofs,
+			fp->frags[IP_LAST_FRAG_IDX].len);
+
+		/* free associated resources. */
+		ip_frag_free(fp, dr);
+	}
+
+	/* we are done with that entry, invalidate it. */
+	ip_frag_key_invalidate(&fp->key);
+	return (mb);
+}
+
+
+/*
+ * Find an entry in the table for the corresponding fragment.
+ * If such entry is not present, then allocate a new one.
+ * If the entry is stale, then free and reuse it.
+ */
+struct rte_ip_frag_pkt *
+ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	const struct ip_frag_key *key, uint64_t tms)
+{
+	struct rte_ip_frag_pkt *pkt, *free, *stale, *lru;
+	uint64_t max_cycles;
+
+	/*
+	 * Actually the two line below are totally redundant.
+	 * they are here, just to make gcc 4.6 happy.
+	 */
+	free = NULL;
+	stale = NULL;
+	max_cycles = tbl->max_cycles;
+
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
+
+	if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
+
+		/*timed-out entry, free and invalidate it*/
+		if (stale != NULL) {
+			ip_frag_tbl_del(tbl, dr, stale);
+			free = stale;
+
+		/*
+		 * we found a free entry, check if we can use it.
+		 * If we run out of free entries in the table, then
+		 * check if we have a timed out entry to delete.
+		 */
+		} else if (free != NULL &&
+				tbl->max_entries <= tbl->use_entries) {
+			lru = TAILQ_FIRST(&tbl->lru);
+			if (max_cycles + lru->start < tms) {
+				ip_frag_tbl_del(tbl, dr, lru);
+			} else {
+				free = NULL;
+				IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
+					fail_nospace, 1);
+			}
+		}
+
+		/* found a free entry to reuse. */
+		if (free != NULL) {
+			ip_frag_tbl_add(tbl,  free, key, tms);
+			pkt = free;
+		}
+
+	/*
+	 * we found the flow, but it is already timed out,
+	 * so free associated resources, reposition it in the LRU list,
+	 * and reuse it.
+	 */
+	} else if (max_cycles + pkt->start < tms) {
+		ip_frag_tbl_reuse(tbl, dr, pkt, tms);
+	}
+
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
+
+	tbl->last = pkt;
+	return (pkt);
+}
+
+struct rte_ip_frag_pkt *
+ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+	const struct ip_frag_key *key, uint64_t tms,
+	struct rte_ip_frag_pkt **free, struct rte_ip_frag_pkt **stale)
+{
+	struct rte_ip_frag_pkt *p1, *p2;
+	struct rte_ip_frag_pkt *empty, *old;
+	uint64_t max_cycles;
+	uint32_t i, assoc, sig1, sig2;
+
+	empty = NULL;
+	old = NULL;
+
+	max_cycles = tbl->max_cycles;
+	assoc = tbl->bucket_entries;
+
+	if (tbl->last != NULL && ip_frag_key_cmp(&tbl->last->key, key) == 0)
+		return (tbl->last);
+
+	ipv4_frag_hash(key, &sig1, &sig2);
+
+	p1 = IP_FRAG_TBL_POS(tbl, sig1);
+	p2 = IP_FRAG_TBL_POS(tbl, sig2);
+
+	for (i = 0; i != assoc; i++) {
+
+		IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+				"tbl: %p, max_entries: %u, use_entries: %u\n"
+				"ipv6_frag_pkt line0: %p, index: %u from %u\n"
+		"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+				__func__, __LINE__,
+				tbl, tbl->max_entries, tbl->use_entries,
+				p1, i, assoc,
+		p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
+
+		if (ip_frag_key_cmp(&p1[i].key, key) == 0)
+			return (p1 + i);
+		else if (ip_frag_key_is_empty(&p1[i].key))
+			empty = (empty == NULL) ? (p1 + i) : empty;
+		else if (max_cycles + p1[i].start < tms)
+			old = (old == NULL) ? (p1 + i) : old;
+
+		IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+				"tbl: %p, max_entries: %u, use_entries: %u\n"
+				"ipv6_frag_pkt line1: %p, index: %u from %u\n"
+		"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+				__func__, __LINE__,
+				tbl, tbl->max_entries, tbl->use_entries,
+				p2, i, assoc,
+		p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
+
+		if (ip_frag_key_cmp(&p2[i].key, key) == 0)
+			return (p2 + i);
+		else if (ip_frag_key_is_empty(&p2[i].key))
+			empty = (empty == NULL) ?( p2 + i) : empty;
+		else if (max_cycles + p2[i].start < tms)
+			old = (old == NULL) ? (p2 + i) : old;
+	}
+
+	*free = empty;
+	*stale = old;
+	return (NULL);
+}
diff --git a/lib/librte_ip_frag/ipv4_frag_tbl.h b/lib/librte_ip_frag/ipv4_frag_tbl.h
deleted file mode 100644
index fa3291d..0000000
--- a/lib/librte_ip_frag/ipv4_frag_tbl.h
+++ /dev/null
@@ -1,400 +0,0 @@
-/*-
- *   BSD LICENSE
- * 
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- * 
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- * 
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- * 
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _IPV4_FRAG_TBL_H_
-#define _IPV4_FRAG_TBL_H_
-
-/**
- * @file
- * IPv4 fragments table.
- *
- * Implementation of IPv4 fragment table create/destroy/find/update.
- *
- */
-
-/*
- * The ip_frag_tbl is a simple hash table:
- * The basic idea is to use two hash functions and <bucket_entries>
- * associativity. This provides 2 * <bucket_entries> possible locations in
- * the hash table for each key. Sort of simplified Cuckoo hashing,
- * when the collision occurs and all 2 * <bucket_entries> are occupied,
- * instead of resinserting existing keys into alternative locations, we just
- * return a faiure.
- * Another thing timing: entries that resides in the table longer then
- * <max_cycles> are considered as invalid, and could be removed/replaced
- * byt the new ones. 
- * <key, data> pair is stored together, all add/update/lookup opearions are not
- * MT safe.
- */
-
-#include <rte_jhash.h>
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
-#include <rte_hash_crc.h>
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
-
-#define	PRIME_VALUE	0xeaad8405
-
-TAILQ_HEAD(ip_pkt_list, ip_frag_pkt);
-
-struct ip_frag_tbl_stat {
-	uint64_t find_num;      /* total # of find/insert attempts. */
-	uint64_t add_num;       /* # of add ops. */
-	uint64_t del_num;       /* # of del ops. */
-	uint64_t reuse_num;     /* # of reuse (del/add) ops. */
-	uint64_t fail_total;    /* total # of add failures. */
-	uint64_t fail_nospace;  /* # of 'no space' add failures. */
-} __rte_cache_aligned;
-
-struct ip_frag_tbl {
-	uint64_t             max_cycles;      /* ttl for table entries. */
-	uint32_t             entry_mask;      /* hash value mask. */
-	uint32_t             max_entries;     /* max entries allowed. */
-	uint32_t             use_entries;     /* entries in use. */
-	uint32_t             bucket_entries;  /* hash assocaitivity. */
-	uint32_t             nb_entries;      /* total size of the table. */
-	uint32_t             nb_buckets;      /* num of associativity lines. */
-	struct ip_frag_pkt *last;           /* last used entry. */
-	struct ip_pkt_list lru;             /* LRU list for table entries. */
-	struct ip_frag_tbl_stat stat;       /* statistics counters. */
-	struct ip_frag_pkt pkt[0];          /* hash table. */
-};
-
-#define	IP_FRAG_TBL_POS(tbl, sig)	\
-	((tbl)->pkt + ((sig) & (tbl)->entry_mask))
-
-#define	IP_FRAG_HASH_FNUM	2
-
-#ifdef IP_FRAG_TBL_STAT
-#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	((s)->f += (v))
-#else
-#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	do {} while (0)
-#endif /* IPV4_FRAG_TBL_STAT */
-
-static inline void
-ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
-{
-	uint32_t v;
-	const uint32_t *p;
-
-	p = (const uint32_t *)&key->src_dst;
-
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
-	v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
-	v = rte_hash_crc_4byte(p[1], v);
-	v = rte_hash_crc_4byte(key->id, v);
-#else
-
-	v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
-
-	*v1 =  v;
-	*v2 = (v << 7) + (v >> 14);
-}
-
-/*
- * Update the table, after we finish processing it's entry.
- */
-static inline void
-ip_frag_inuse(struct ip_frag_tbl *tbl, const struct  ip_frag_pkt *fp)
-{
-	if (IP_FRAG_KEY_EMPTY(&fp->key)) {
-		TAILQ_REMOVE(&tbl->lru, fp, lru);
-		tbl->use_entries--;
-	}
-}
-
-/*
- * For the given key, try to find an existing entry.
- * If such entry doesn't exist, will return free and/or timed-out entry,
- * that can be used for that key.
- */
-static inline struct  ip_frag_pkt *
-ip_frag_lookup(struct ip_frag_tbl *tbl,
-	const struct ip_frag_key *key, uint64_t tms,
-	struct ip_frag_pkt **free, struct ip_frag_pkt **stale)
-{
-	struct ip_frag_pkt *p1, *p2;
-	struct ip_frag_pkt *empty, *old;
-	uint64_t max_cycles;
-	uint32_t i, assoc, sig1, sig2;
-
-	empty = NULL;
-	old = NULL;
-
-	max_cycles = tbl->max_cycles;
-	assoc = tbl->bucket_entries;
-
-	if (tbl->last != NULL && IP_FRAG_KEY_CMP(&tbl->last->key, key) == 0)
-		return (tbl->last);
-
-	ipv4_frag_hash(key, &sig1, &sig2);
-	p1 = IP_FRAG_TBL_POS(tbl, sig1);
-	p2 = IP_FRAG_TBL_POS(tbl, sig2);
-
-	for (i = 0; i != assoc; i++) {
-
-		IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-                "tbl: %p, max_entries: %u, use_entries: %u\n"
-                "ip_frag_pkt line0: %p, index: %u from %u\n"
-		"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
-                __func__, __LINE__,
-                tbl, tbl->max_entries, tbl->use_entries,
-                p1, i, assoc,
-		p1[i].key.src_dst, p1[i].key.id, p1[i].start);
-
-		if (IP_FRAG_KEY_CMP(&p1[i].key, key) == 0)
-			return (p1 + i);
-		else if (IP_FRAG_KEY_EMPTY(&p1[i].key))
-			empty = (empty == NULL) ? (p1 + i) : empty;
-		else if (max_cycles + p1[i].start < tms)
-			old = (old == NULL) ? (p1 + i) : old;
-
-		IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-                "tbl: %p, max_entries: %u, use_entries: %u\n"
-                "ip_frag_pkt line1: %p, index: %u from %u\n"
-		"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
-                __func__, __LINE__,
-                tbl, tbl->max_entries, tbl->use_entries,
-                p2, i, assoc,
-		p2[i].key.src_dst, p2[i].key.id, p2[i].start);
-
-		if (IP_FRAG_KEY_CMP(&p2[i].key, key) == 0)
-			return (p2 + i);
-		else if (IP_FRAG_KEY_EMPTY(&p2[i].key))
-			empty = (empty == NULL) ?( p2 + i) : empty;
-		else if (max_cycles + p2[i].start < tms)
-			old = (old == NULL) ? (p2 + i) : old;
-	}
-
-	*free = empty;
-	*stale = old;
-	return (NULL);
-}
-
-static inline void
-ip_frag_tbl_del(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr,
-	struct ip_frag_pkt *fp)
-{
-	ip_frag_free(fp, dr);
-	IP_FRAG_KEY_INVALIDATE(&fp->key);
-	TAILQ_REMOVE(&tbl->lru, fp, lru);
-	tbl->use_entries--;
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
-}
-
-static inline void
-ip_frag_tbl_add(struct ip_frag_tbl *tbl,  struct ip_frag_pkt *fp,
-	const struct ip_frag_key *key, uint64_t tms)
-{
-	fp->key = key[0];
-	ip_frag_reset(fp, tms);
-	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
-	tbl->use_entries++;
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
-}
-
-static inline void
-ip_frag_tbl_reuse(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr,
-	struct ip_frag_pkt *fp, uint64_t tms)
-{
-	ip_frag_free(fp, dr);
-	ip_frag_reset(fp, tms);
-	TAILQ_REMOVE(&tbl->lru, fp, lru);
-	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
-}
-
-/*
- * Find an entry in the table for the corresponding fragment.
- * If such entry is not present, then allocate a new one.
- * If the entry is stale, then free and reuse it.
- */
-static inline struct ip_frag_pkt *
-ip_frag_find(struct ip_frag_tbl *tbl, struct ip_frag_death_row *dr,
-	const struct ip_frag_key *key, uint64_t tms)
-{
-	struct ip_frag_pkt *pkt, *free, *stale, *lru;
-	uint64_t max_cycles;
-
-	/*
-	 * Actually the two line below are totally redundant.
-	 * they are here, just to make gcc 4.6 happy.
-	 */
-	free = NULL;
-	stale = NULL;
-	max_cycles = tbl->max_cycles;
-
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
-
-	if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
-
-		/*timed-out entry, free and invalidate it*/
-		if (stale != NULL) {
-			ip_frag_tbl_del(tbl, dr, stale);
-			free = stale;
-
-		/*
-		 * we found a free entry, check if we can use it.
-		 * If we run out of free entries in the table, then
-		 * check if we have a timed out entry to delete. 
-		 */
-		} else if (free != NULL &&
-				tbl->max_entries <= tbl->use_entries) {
-			lru = TAILQ_FIRST(&tbl->lru);
-			if (max_cycles + lru->start < tms) {
-				ip_frag_tbl_del(tbl, dr, lru);
-			} else {
-				free = NULL;
-				IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
-					fail_nospace, 1);
-			}
-		}
-
-		/* found a free entry to reuse. */
-		if (free != NULL) {
-			ip_frag_tbl_add(tbl,  free, key, tms);
-			pkt = free;
-		}
-
-	/*
-	 * we found the flow, but it is already timed out,
-	 * so free associated resources, reposition it in the LRU list,
-	 * and reuse it.
-	 */
-	} else if (max_cycles + pkt->start < tms) {
-		ip_frag_tbl_reuse(tbl, dr, pkt, tms);
-	}
-
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
-
-	tbl->last = pkt;
-	return (pkt);
-}
-
-/*
- * Create a new IPV4 Frag table.
- * @param bucket_num
- *  Number of buckets in the hash table.
- * @param bucket_entries
- *  Number of entries per bucket (e.g. hash associativity).
- *  Should be power of two.
- * @param max_entries
- *   Maximum number of entries that could be stored in the table.
- *   The value should be less or equal then bucket_num * bucket_entries.
- * @param max_cycles
- *   Maximum TTL in cycles for each fragmented packet.
- * @param socket_id
- *  The *socket_id* argument is the socket identifier in the case of
- *  NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints.
- * @return
- *   The pointer to the new allocated mempool, on success. NULL on error.
- */
-static struct ip_frag_tbl *
-rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
-	uint32_t max_entries, uint64_t max_cycles, int socket_id)
-{
-	struct ip_frag_tbl *tbl;
-	size_t sz;
-	uint64_t nb_entries;
-
-	nb_entries = rte_align32pow2(bucket_num);
-	nb_entries *= bucket_entries;
-	nb_entries *= IP_FRAG_HASH_FNUM;
-
-	/* check input parameters. */
-	if (rte_is_power_of_2(bucket_entries) == 0 ||
-			nb_entries > UINT32_MAX || nb_entries == 0 ||
-			nb_entries < max_entries) {
-		RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
-		return (NULL);
-	}
-
-	sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
-	if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE,
-			socket_id)) == NULL) {
-		RTE_LOG(ERR, USER1,
-			"%s: allocation of %zu bytes at socket %d failed do\n",
-			__func__, sz, socket_id);
-		return (NULL);
-	}
-
-	RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
-		__func__, sz, socket_id); 
-
-	tbl->max_cycles = max_cycles;
-	tbl->max_entries = max_entries;
-	tbl->nb_entries = (uint32_t)nb_entries;
-	tbl->nb_buckets = bucket_num;
-	tbl->bucket_entries = bucket_entries;
-	tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries  - 1);
-
-	TAILQ_INIT(&(tbl->lru));
-	return (tbl);
-}
-
-static inline void
-rte_ip_frag_table_destroy( struct ip_frag_tbl *tbl)
-{
-	rte_free(tbl);
-}
-
-static void
-rte_ip_frag_table_statistics_dump(FILE *f, const struct ip_frag_tbl *tbl)
-{
-	uint64_t fail_total, fail_nospace;
-
-	fail_total = tbl->stat.fail_total;
-	fail_nospace = tbl->stat.fail_nospace;
-
-	fprintf(f, "max entries:\t%u;\n"
-		"entries in use:\t%u;\n"
-		"finds/inserts:\t%" PRIu64 ";\n"
-		"entries added:\t%" PRIu64 ";\n"
-		"entries deleted by timeout:\t%" PRIu64 ";\n"
-		"entries reused by timeout:\t%" PRIu64 ";\n"
-		"total add failures:\t%" PRIu64 ";\n"
-		"add no-space failures:\t%" PRIu64 ";\n"
-		"add hash-collisions failures:\t%" PRIu64 ";\n",
-		tbl->max_entries,
-		tbl->use_entries,
-		tbl->stat.find_num,
-		tbl->stat.add_num,
-		tbl->stat.del_num,
-		tbl->stat.reuse_num,
-		fail_total,
-		fail_nospace,
-		fail_total - fail_nospace);
-}
-
-
-#endif /* _IPV4_FRAG_TBL_H_ */
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 0cf3878..327e1f1 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -1,13 +1,13 @@
 /*-
  *   BSD LICENSE
- * 
+ *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
  *   All rights reserved.
- * 
+ *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
  *   are met:
- * 
+ *
  *     * Redistributions of source code must retain the above copyright
  *       notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright
@@ -17,7 +17,7 @@
  *     * Neither the name of Intel Corporation nor the names of its
  *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
- * 
+ *
  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,16 +31,147 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _RTE_IP_FRAG_H__
-#define _RTE_IP_FRAG_H__
+#ifndef _RTE_IP_FRAG_H_
+#define _RTE_IP_FRAG_H_
 
 /**
  * @file
- * RTE IPv4 Fragmentation
+ * RTE IPv4 Fragmentation and Reassembly
+ *
+ * Implementation of IPv4 packet fragmentation and reassembly.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+
+enum {
+	IP_LAST_FRAG_IDX,    /**< index of last fragment */
+	IP_FIRST_FRAG_IDX,   /**< index of first fragment */
+	IP_MIN_FRAG_NUM,     /**< minimum number of fragments */
+	IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG,
+	/**< maximum number of fragments per packet */
+};
+
+/** @internal fragmented mbuf */
+struct ip_frag {
+	uint16_t ofs;          /**< offset into the packet */
+	uint16_t len;          /**< length of fragment */
+	struct rte_mbuf *mb;   /**< fragment mbuf */
+};
+
+/** @internal <src addr, dst_addr, id> to uniquely indetify fragmented datagram. */
+struct ip_frag_key {
+	uint64_t src_dst;      /**< src address */
+	uint32_t id;           /**< dst address */
+};
+
+/*
+ * @internal Fragmented packet to reassemble.
+ * First two entries in the frags[] array are for the last and first fragments.
+ */
+struct rte_ip_frag_pkt {
+	TAILQ_ENTRY(rte_ip_frag_pkt) lru;   /**< LRU list */
+	struct ip_frag_key key;           /**< fragmentation key */
+	uint64_t             start;       /**< creation timestamp */
+	uint32_t             total_size;  /**< expected reassembled size */
+	uint32_t             frag_size;   /**< size of fragments received */
+	uint32_t             last_idx;    /**< index of next entry to fill */
+	struct ip_frag       frags[IP_MAX_FRAG_NUM]; /**< fragments */
+} __rte_cache_aligned;
+
+#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */
+
+/** mbuf death row (packets to be freed) */
+struct rte_ip_frag_death_row {
+	uint32_t cnt;          /**< number of mbufs currently on death row */
+	struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)];
+	/**< mbufs to be freed */
+};
+
+TAILQ_HEAD(rte_ip_pkt_list, rte_ip_frag_pkt); /**< @internal fragments tailq */
+
+/** fragmentation table statistics */
+struct rte_ip_frag_tbl_stat {
+	uint64_t find_num;      /**< total # of find/insert attempts. */
+	uint64_t add_num;       /**< # of add ops. */
+	uint64_t del_num;       /**< # of del ops. */
+	uint64_t reuse_num;     /**< # of reuse (del/add) ops. */
+	uint64_t fail_total;    /**< total # of add failures. */
+	uint64_t fail_nospace;  /**< # of 'no space' add failures. */
+} __rte_cache_aligned;
+
+/** fragmentation table */
+struct rte_ip_frag_tbl {
+	uint64_t             max_cycles;      /**< ttl for table entries. */
+	uint32_t             entry_mask;      /**< hash value mask. */
+	uint32_t             max_entries;     /**< max entries allowed. */
+	uint32_t             use_entries;     /**< entries in use. */
+	uint32_t             bucket_entries;  /**< hash assocaitivity. */
+	uint32_t             nb_entries;      /**< total size of the table. */
+	uint32_t             nb_buckets;      /**< num of associativity lines. */
+	struct rte_ip_frag_pkt *last;         /**< last used entry. */
+	struct rte_ip_pkt_list lru;           /**< LRU list for table entries. */
+	struct rte_ip_frag_tbl_stat stat;     /**< statistics counters. */
+	struct rte_ip_frag_pkt pkt[0];        /**< hash table. */
+};
+
+/** IPv6 fragment extension header */
+struct ipv6_extension_fragment {
+	uint8_t next_header;            /**< Next header type */
+	uint8_t reserved1;              /**< Reserved */
+	union {
+		struct {
+			uint16_t frag_offset:13; /**< Offset from the start of the packet */
+			uint16_t reserved2:2; /**< Reserved */
+			uint16_t more_frags:1;
+			/**< 1 if more fragments left, 0 if last fragment */
+		};
+		uint16_t frag_data;
+		/**< union of all fragmentation data */
+	};
+	uint32_t id;                    /**< Packet ID */
+} __attribute__((__packed__));
+
+
+
+/*
+ * Create a new IP fragmentation table.
  *
- * Implementation of IPv4 fragmentation.
+ * @param bucket_num
+ *   Number of buckets in the hash table.
+ * @param bucket_entries
+ *   Number of entries per bucket (e.g. hash associativity).
+ *   Should be power of two.
+ * @param max_entries
+ *   Maximum number of entries that could be stored in the table.
+ *   The value should be less or equal then bucket_num * bucket_entries.
+ * @param max_cycles
+ *   Maximum TTL in cycles for each fragmented packet.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in the case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints.
+ * @return
+ *   The pointer to the new allocated fragmentation table, on success. NULL on error.
+ */
+struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num,
+		uint32_t bucket_entries,  uint32_t max_entries,
+		uint64_t max_cycles, int socket_id);
+
+/*
+ * Free allocated IP fragmentation table.
  *
+ * @param btl
+ *   Fragmentation table to free.
  */
+static inline void
+rte_ip_frag_table_destroy( struct rte_ip_frag_tbl *tbl)
+{
+	rte_free(tbl);
+}
 
 /**
  * IPv4 fragmentation.
@@ -64,10 +195,74 @@
  *   Otherwise - (-1) * <errno>.
  */
 int32_t rte_ipv4_fragmentation(struct rte_mbuf *pkt_in,
-	struct rte_mbuf **pkts_out,
-	uint16_t nb_pkts_out,
-	uint16_t mtu_size,
-	struct rte_mempool *pool_direct,
-	struct rte_mempool *pool_indirect);
+			struct rte_mbuf **pkts_out,
+			uint16_t nb_pkts_out, uint16_t mtu_size,
+			struct rte_mempool *pool_direct,
+			struct rte_mempool *pool_indirect);
+
+/*
+ * This function implements reassembly of fragmented IPv4 packets.
+ * Incoming mbufs should have its l2_len/l3_len fields setup correclty.
+ *
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param dr
+ *   Death row to free buffers to
+ * @param mb
+ *   Incoming mbuf with IPv4 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPV4 header inside the fragment.
+ * @return
+ *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr,
+		struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr);
+
+/*
+ * Check if the IPv4 packet is fragmented
+ *
+ * @param hdr
+ *   IPv4 header of the packet
+ * @return
+ *   1 if fragmented, 0 if not fragmented
+ */
+static inline int
+rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) {
+	uint16_t flag_offset, ip_flag, ip_ofs;
+
+	flag_offset = rte_be_to_cpu_16(hdr->fragment_offset);
+	ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+	ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+	return ip_flag != 0 || ip_ofs  != 0;
+}
+
+/*
+ * Free mbufs on a given death row.
+ *
+ * @param dr
+ *   Death row to free mbufs in.
+ * @param prefetch
+ *   How many buffers to prefetch before freeing.
+ */
+void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+		uint32_t prefetch);
+
+
+/*
+ * Dump fragmentation table statistics to file.
+ *
+ * @param f
+ *   File to dump statistics to
+ * @param tbl
+ *   Fragmentation table to dump statistics from
+ */
+void
+rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
 
-#endif
+#endif /* _RTE_IP_FRAG_H_ */
diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c
new file mode 100644
index 0000000..acd1864
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ip_frag_common.c
@@ -0,0 +1,142 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_memory.h>
+#include <rte_log.h>
+#include <rte_byteorder.h>
+
+#include "rte_ip_frag.h"
+#include "ip_frag_common.h"
+
+#define	IP_FRAG_HASH_FNUM	2
+
+/* free mbufs from death row */
+void
+rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+		uint32_t prefetch)
+{
+	uint32_t i, k, n;
+
+	k = RTE_MIN(prefetch, dr->cnt);
+	n = dr->cnt;
+
+	for (i = 0; i != k; i++)
+		rte_prefetch0(dr->row[i]);
+
+	for (i = 0; i != n - k; i++) {
+		rte_prefetch0(dr->row[i + k]);
+		rte_pktmbuf_free(dr->row[i]);
+	}
+
+	for (; i != n; i++)
+		rte_pktmbuf_free(dr->row[i]);
+
+	dr->cnt = 0;
+}
+
+/* create fragmentation table */
+struct rte_ip_frag_tbl *
+rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
+	uint32_t max_entries, uint64_t max_cycles, int socket_id)
+{
+	struct rte_ip_frag_tbl *tbl;
+	size_t sz;
+	uint64_t nb_entries;
+
+	nb_entries = rte_align32pow2(bucket_num);
+	nb_entries *= bucket_entries;
+	nb_entries *= IP_FRAG_HASH_FNUM;
+
+	/* check input parameters. */
+	if (rte_is_power_of_2(bucket_entries) == 0 ||
+			nb_entries > UINT32_MAX || nb_entries == 0 ||
+			nb_entries < max_entries) {
+		RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
+		return (NULL);
+	}
+
+	sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
+	if ((tbl = rte_zmalloc_socket(__func__, sz, CACHE_LINE_SIZE,
+			socket_id)) == NULL) {
+		RTE_LOG(ERR, USER1,
+			"%s: allocation of %zu bytes at socket %d failed do\n",
+			__func__, sz, socket_id);
+		return (NULL);
+	}
+
+	RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
+		__func__, sz, socket_id);
+
+	tbl->max_cycles = max_cycles;
+	tbl->max_entries = max_entries;
+	tbl->nb_entries = (uint32_t)nb_entries;
+	tbl->nb_buckets = bucket_num;
+	tbl->bucket_entries = bucket_entries;
+	tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries  - 1);
+
+	TAILQ_INIT(&(tbl->lru));
+	return (tbl);
+}
+
+/* dump frag table statistics to file */
+void
+rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
+{
+	uint64_t fail_total, fail_nospace;
+
+	fail_total = tbl->stat.fail_total;
+	fail_nospace = tbl->stat.fail_nospace;
+
+	fprintf(f, "max entries:\t%u;\n"
+		"entries in use:\t%u;\n"
+		"finds/inserts:\t%" PRIu64 ";\n"
+		"entries added:\t%" PRIu64 ";\n"
+		"entries deleted by timeout:\t%" PRIu64 ";\n"
+		"entries reused by timeout:\t%" PRIu64 ";\n"
+		"total add failures:\t%" PRIu64 ";\n"
+		"add no-space failures:\t%" PRIu64 ";\n"
+		"add hash-collisions failures:\t%" PRIu64 ";\n",
+		tbl->max_entries,
+		tbl->use_entries,
+		tbl->stat.find_num,
+		tbl->stat.add_num,
+		tbl->stat.del_num,
+		tbl->stat.reuse_num,
+		fail_total,
+		fail_nospace,
+		fail_total - fail_nospace);
+}
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c
new file mode 100644
index 0000000..483fb95
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -0,0 +1,189 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_byteorder.h>
+#include <rte_mbuf.h>
+#include <rte_debug.h>
+#include <rte_tailq.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "rte_ip_frag.h"
+#include "ip_frag_common.h"
+
+/*
+ * Reassemble fragments into one packet.
+ */
+struct rte_mbuf *
+ipv4_frag_reassemble(const struct rte_ip_frag_pkt *fp)
+{
+	struct ipv4_hdr *ip_hdr;
+	struct rte_mbuf *m, *prev;
+	uint32_t i, n, ofs, first_len;
+
+	first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
+	n = fp->last_idx - 1;
+
+	/*start from the last fragment. */
+	m = fp->frags[IP_LAST_FRAG_IDX].mb;
+	ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+
+	while (ofs != first_len) {
+
+		prev = m;
+
+		for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
+
+			/* previous fragment found. */
+			if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
+
+				ip_frag_chain(fp->frags[i].mb, m);
+
+				/* update our last fragment and offset. */
+				m = fp->frags[i].mb;
+				ofs = fp->frags[i].ofs;
+			}
+		}
+
+		/* error - hole in the packet. */
+		if (m == prev) {
+			return (NULL);
+		}
+	}
+
+	/* chain with the first fragment. */
+	ip_frag_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+	m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+
+	/* update mbuf fields for reassembled packet. */
+	m->ol_flags |= PKT_TX_IP_CKSUM;
+
+	/* update ipv4 header for the reassmebled packet */
+	ip_hdr = (struct ipv4_hdr*)(rte_pktmbuf_mtod(m, uint8_t *) +
+		m->pkt.vlan_macip.f.l2_len);
+
+	ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
+		m->pkt.vlan_macip.f.l3_len));
+	ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
+		rte_cpu_to_be_16(IPV4_HDR_DF_FLAG));
+	ip_hdr->hdr_checksum = 0;
+
+	return (m);
+}
+
+/*
+ * Process new mbuf with fragment of IPV4 packet.
+ * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty.
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param mb
+ *   Incoming mbuf with IPV4 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPV4 header inside the fragment.
+ * @return
+ *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf *
+rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+		struct ipv4_hdr *ip_hdr)
+{
+	struct rte_ip_frag_pkt *fp;
+	struct ip_frag_key key;
+	const uint64_t *psd;
+	uint16_t ip_len;
+	uint16_t flag_offset, ip_ofs, ip_flag;
+
+	flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset);
+	ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+	ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+	psd = (uint64_t *)&ip_hdr->src_addr;
+	key.src_dst = *psd;
+	key.id = ip_hdr->packet_id;
+
+	ip_ofs *= IPV4_HDR_OFFSET_UNITS;
+	ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) -
+		mb->pkt.vlan_macip.f.l3_len);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p, tms: %" PRIu64
+		", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n"
+		"tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
+		"max_entries: %u, use_entries: %u\n\n",
+		__func__, __LINE__,
+		mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag,
+		tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
+		tbl->use_entries);
+
+	/* try to find/add entry into the fragment's table. */
+	if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
+		IP_FRAG_MBUF2DR(dr, mb);
+		return (NULL);
+	}
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, fp->key.src_dst, fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+
+	/* process the fragmented packet. */
+	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
+	ip_frag_inuse(tbl, fp);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__, mb,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, fp->key.src_dst, fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+	return (mb);
+}
diff --git a/lib/librte_ip_frag/rte_ipv4_rsmbl.h b/lib/librte_ip_frag/rte_ipv4_rsmbl.h
deleted file mode 100644
index 82cb9b5..0000000
--- a/lib/librte_ip_frag/rte_ipv4_rsmbl.h
+++ /dev/null
@@ -1,427 +0,0 @@
-/*-
- *   BSD LICENSE
- * 
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- * 
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- * 
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- * 
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _IPV4_RSMBL_H_
-#define _IPV4_RSMBL_H_
-
-#include "ip_frag_common.h"
-
-/**
- * @file
- * IPv4 reassemble
- *
- * Implementation of IPv4 reassemble.
- *
- */
-
-enum {
-	LAST_FRAG_IDX,
-	FIRST_FRAG_IDX,
-	MIN_FRAG_NUM,
-	MAX_FRAG_NUM = 4,
-};
-
-struct ip_frag {
-	uint16_t ofs;
-	uint16_t len;
-	struct rte_mbuf *mb;
-};
-
-/*
- * Use <src addr, dst_addr, id> to uniquely indetify fragmented datagram.
- */
-struct ip_frag_key {
-	uint64_t  src_dst;
-	uint32_t  id;
-};
-
-#define	IP_FRAG_KEY_INVALIDATE(k)	((k)->src_dst = 0)
-#define	IP_FRAG_KEY_EMPTY(k)		((k)->src_dst == 0)
-
-#define	IP_FRAG_KEY_CMP(k1, k2)	\
-	(((k1)->src_dst ^ (k2)->src_dst) | ((k1)->id ^ (k2)->id))
-
-
-/*
- * Fragmented packet to reassemble.
- * First two entries in the frags[] array are for the last and first fragments.
- */
-struct ip_frag_pkt {
-	TAILQ_ENTRY(ip_frag_pkt) lru;   /* LRU list */
-	struct ip_frag_key key;
-	uint64_t             start;       /* creation timestamp */
-	uint32_t             total_size;  /* expected reassembled size */
-	uint32_t             frag_size;   /* size of fragments received */
-	uint32_t             last_idx;    /* index of next entry to fill */
-	struct ip_frag     frags[MAX_FRAG_NUM];
-} __rte_cache_aligned;
-
-
-struct ip_frag_death_row {
-	uint32_t cnt;
-	struct rte_mbuf *row[MAX_PKT_BURST * (MAX_FRAG_NUM + 1)];
-};
-
-#define	IP_FRAG_MBUF2DR(dr, mb)	((dr)->row[(dr)->cnt++] = (mb))
-
-/* logging macros. */
-
-#ifdef IP_FRAG_DEBUG
-#define	IP_FRAG_LOG(lvl, fmt, args...)	RTE_LOG(lvl, USER1, fmt, ##args)
-#else
-#define	IP_FRAG_LOG(lvl, fmt, args...)	do {} while(0)
-#endif /* IP_FRAG_DEBUG */
-
-
-static inline void
-ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
-{
-	static const struct ip_frag zero_frag = {
-		.ofs = 0,
-		.len = 0,
-		.mb = NULL,
-	};
-
-	fp->start = tms;
-	fp->total_size = UINT32_MAX;
-	fp->frag_size = 0;
-	fp->last_idx = MIN_FRAG_NUM;
-	fp->frags[LAST_FRAG_IDX] = zero_frag;
-	fp->frags[FIRST_FRAG_IDX] = zero_frag;
-}
-
-static inline void
-ip_frag_free(struct ip_frag_pkt *fp, struct ip_frag_death_row *dr)
-{
-	uint32_t i, k;
-
-	k = dr->cnt;
-	for (i = 0; i != fp->last_idx; i++) {
-		if (fp->frags[i].mb != NULL) {
-			dr->row[k++] = fp->frags[i].mb;
-			fp->frags[i].mb = NULL;
-		}
-	}
-
-	fp->last_idx = 0;
-	dr->cnt = k;
-}
-
-static inline void
-rte_ip_frag_free_death_row(struct ip_frag_death_row *dr, uint32_t prefetch)
-{
-	uint32_t i, k, n;
-
-	k = RTE_MIN(prefetch, dr->cnt);
-	n = dr->cnt;
-
-	for (i = 0; i != k; i++) 
-		rte_prefetch0(dr->row[i]);
-
-	for (i = 0; i != n - k; i++) {
-		rte_prefetch0(dr->row[i + k]);
-		rte_pktmbuf_free(dr->row[i]);
-	}
-
-	for (; i != n; i++)
-		rte_pktmbuf_free(dr->row[i]);
-
-	dr->cnt = 0;
-}
-
-/*
- * Helper function.
- * Takes 2 mbufs that represents two framents of the same packet and
- * chains them into one mbuf.
- */
-static inline void
-ip_frag_chain(struct rte_mbuf *mn, struct rte_mbuf *mp)
-{
-	struct rte_mbuf *ms;
-
-	/* adjust start of the last fragment data. */
-	rte_pktmbuf_adj(mp, (uint16_t)(mp->pkt.vlan_macip.f.l2_len +
-		mp->pkt.vlan_macip.f.l3_len));
-				
-	/* chain two fragments. */
-	ms = rte_pktmbuf_lastseg(mn);
-	ms->pkt.next = mp;
-
-	/* accumulate number of segments and total length. */
-	mn->pkt.nb_segs = (uint8_t)(mn->pkt.nb_segs + mp->pkt.nb_segs);
-	mn->pkt.pkt_len += mp->pkt.pkt_len;
-					
-	/* reset pkt_len and nb_segs for chained fragment. */
-	mp->pkt.pkt_len = mp->pkt.data_len;
-	mp->pkt.nb_segs = 1;
-}
-
-/*
- * Reassemble fragments into one packet.
- */
-static inline struct rte_mbuf *
-ipv4_frag_reassemble(const struct ip_frag_pkt *fp)
-{
-	struct ipv4_hdr *ip_hdr;
-	struct rte_mbuf *m, *prev;
-	uint32_t i, n, ofs, first_len;
-
-	first_len = fp->frags[FIRST_FRAG_IDX].len;
-	n = fp->last_idx - 1;
-
-	/*start from the last fragment. */
-	m = fp->frags[LAST_FRAG_IDX].mb;
-	ofs = fp->frags[LAST_FRAG_IDX].ofs;
-
-	while (ofs != first_len) {
-
-		prev = m;
-
-		for (i = n; i != FIRST_FRAG_IDX && ofs != first_len; i--) {
-
-			/* previous fragment found. */
-			if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
-
-				ip_frag_chain(fp->frags[i].mb, m);
-
-				/* update our last fragment and offset. */
-				m = fp->frags[i].mb;
-				ofs = fp->frags[i].ofs;
-			}
-		}
-
-		/* error - hole in the packet. */
-		if (m == prev) {
-			return (NULL);
-		}
-	}
-
-	/* chain with the first fragment. */
-	ip_frag_chain(fp->frags[FIRST_FRAG_IDX].mb, m);
-	m = fp->frags[FIRST_FRAG_IDX].mb;
-
-	/* update mbuf fields for reassembled packet. */
-	m->ol_flags |= PKT_TX_IP_CKSUM;
-
-	/* update ipv4 header for the reassmebled packet */
-	ip_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, uint8_t *) +
-		m->pkt.vlan_macip.f.l2_len);
-
-	ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
-		m->pkt.vlan_macip.f.l3_len));
-	ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
-		rte_cpu_to_be_16(IPV4_HDR_DF_FLAG));
-	ip_hdr->hdr_checksum = 0;
-
-	return (m);
-}
-
-static inline struct rte_mbuf *
-ip_frag_process(struct ip_frag_pkt *fp, struct ip_frag_death_row *dr,
-	struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
-{
-	uint32_t idx;
-
-	fp->frag_size += len;
-
-	/* this is the first fragment. */
-	if (ofs == 0) {
-		idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ?
-			FIRST_FRAG_IDX : UINT32_MAX;
-
-	/* this is the last fragment. */
-	} else if (more_frags == 0) {
-		fp->total_size = ofs + len;
-		idx = (fp->frags[LAST_FRAG_IDX].mb == NULL) ?
-			LAST_FRAG_IDX : UINT32_MAX;
-
-	/* this is the intermediate fragment. */
-	} else if ((idx = fp->last_idx) <
-		sizeof (fp->frags) / sizeof (fp->frags[0])) {
-		fp->last_idx++;
-	}
-
-	/*
-	 * errorneous packet: either exceeed max allowed number of fragments,
-	 * or duplicate first/last fragment encountered.
-	 */
-	if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
-
-		/* report an error. */
-		IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
-			"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
-			"total_size: %u, frag_size: %u, last_idx: %u\n"
-			"first fragment: ofs: %u, len: %u\n"
-			"last fragment: ofs: %u, len: %u\n\n",
-			__func__, __LINE__,
-			fp, fp->key.src_dst, fp->key.id,
-			fp->total_size, fp->frag_size, fp->last_idx,
-			fp->frags[FIRST_FRAG_IDX].ofs,
-			fp->frags[FIRST_FRAG_IDX].len,
-			fp->frags[LAST_FRAG_IDX].ofs,
-			fp->frags[LAST_FRAG_IDX].len);
-
-		/* free all fragments, invalidate the entry. */
-		ip_frag_free(fp, dr);
-		IP_FRAG_KEY_INVALIDATE(&fp->key);
-		IP_FRAG_MBUF2DR(dr, mb);
-
-		return (NULL);
-	}
-
-	fp->frags[idx].ofs = ofs;
-	fp->frags[idx].len = len;
-	fp->frags[idx].mb = mb;
-
-	mb = NULL;
-
-	/* not all fragments are collected yet. */
-	if (likely (fp->frag_size < fp->total_size)) {
-		return (mb);
-
-	/* if we collected all fragments, then try to reassemble. */
-	} else if (fp->frag_size == fp->total_size &&
-			fp->frags[FIRST_FRAG_IDX].mb != NULL) {
-		mb = ipv4_frag_reassemble(fp);
-	}
-
-	/* errorenous set of fragments. */
-	if (mb == NULL) {
-
-		/* report an error. */
-		IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
-			"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
-			"total_size: %u, frag_size: %u, last_idx: %u\n"
-			"first fragment: ofs: %u, len: %u\n"
-			"last fragment: ofs: %u, len: %u\n\n",
-			__func__, __LINE__,
-			fp, fp->key.src_dst, fp->key.id,
-			fp->total_size, fp->frag_size, fp->last_idx,
-			fp->frags[FIRST_FRAG_IDX].ofs,
-			fp->frags[FIRST_FRAG_IDX].len,
-			fp->frags[LAST_FRAG_IDX].ofs,
-			fp->frags[LAST_FRAG_IDX].len);
-
-		/* free associated resources. */
-		ip_frag_free(fp, dr);
-	}
-
-	/* we are done with that entry, invalidate it. */
-	IP_FRAG_KEY_INVALIDATE(&fp->key);
-	return (mb);
-}
-
-#include "ipv4_frag_tbl.h"
-
-/*
- * Process new mbuf with fragment of IPV4 packet.
- * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty.
- * @param tbl
- *   Table where to lookup/add the fragmented packet.
- * @param mb
- *   Incoming mbuf with IPV4 fragment.
- * @param tms
- *   Fragment arrival timestamp.
- * @param ip_hdr
- *   Pointer to the IPV4 header inside the fragment.
- * @param ip_ofs
- *   Fragment's offset (as extracted from the header).
- * @param ip_flag
- *   Fragment's MF flag.
- * @return  
- *   Pointer to mbuf for reassebled packet, or NULL if:
- *   - an error occured.
- *   - not all fragments of the packet are collected yet.
- */
-static inline struct rte_mbuf *
-rte_ipv4_reassemble_packet(struct ip_frag_tbl *tbl,
-		struct ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
-		struct ipv4_hdr *ip_hdr, uint16_t ip_ofs, uint16_t ip_flag)
-{
-	struct ip_frag_pkt *fp;
-	struct ip_frag_key key;
-	const uint64_t *psd;
-	uint16_t ip_len;
-
-	psd = (uint64_t *)&ip_hdr->src_addr;
-	key.src_dst = psd[0];
-	key.id = ip_hdr->packet_id;
-
-	ip_ofs *= IPV4_HDR_OFFSET_UNITS;
-	ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) -
-		mb->pkt.vlan_macip.f.l3_len);
-
-	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-		"mbuf: %p, tms: %" PRIu64
-		", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n"
-		"tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
-		"max_entries: %u, use_entries: %u\n\n",
-		__func__, __LINE__,
-		mb, tms, key.src_dst, key.id, ip_ofs, ip_len, ip_flag,
-		tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
-		tbl->use_entries);
-
-	/* try to find/add entry into the fragment's table. */
-	if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
-		IP_FRAG_MBUF2DR(dr, mb);
-		return NULL;
-	}
-
-	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-		"tbl: %p, max_entries: %u, use_entries: %u\n"
-		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
-		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
-		__func__, __LINE__,
-		tbl, tbl->max_entries, tbl->use_entries,
-		fp, fp->key.src_dst, fp->key.id, fp->start,
-		fp->total_size, fp->frag_size, fp->last_idx);
-		
-
-	/* process the fragmented packet. */
-	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
-	ip_frag_inuse(tbl, fp);
-
-	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
-		"mbuf: %p\n"
-		"tbl: %p, max_entries: %u, use_entries: %u\n"
-		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
-		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
-		__func__, __LINE__, mb,
-		tbl, tbl->max_entries, tbl->use_entries,
-		fp, fp->key.src_dst, fp->key.id, fp->start,
-		fp->total_size, fp->frag_size, fp->last_idx);
-
-	return (mb);
-}
-
-#endif /* _IPV4_RSMBL_H_ */
-- 
1.8.1.4

  parent reply	other threads:[~2014-05-28 17:32 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-28 17:32 [dpdk-dev] [PATCH 00/13] *** SUBJECT HERE *** Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 00/13] IPv4/IPv6 fragmentation/reassembly library Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 01/13] ip_frag: Moving fragmentation/reassembly headers into a separate library Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 02/13] Refactored IPv4 fragmentation into a proper library Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 03/13] Fixing issues reported by checkpatch Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 04/13] ip_frag: new internal common header Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 05/13] ip_frag: removed unneeded check and macro Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 06/13] ip_frag: renaming structures in fragmentation table to be more generic Anatoly Burakov
2014-05-28 17:32 ` Anatoly Burakov [this message]
2014-05-28 17:32 ` [dpdk-dev] [PATCH 08/13] ip_frag: renamed ipv4 frag function Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 09/13] ip_frag: added IPv6 fragmentation support Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 10/13] examples: renamed ipv4_frag example app to ip_fragmentation Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 11/13] example: overhaul of ip_fragmentation example app Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 12/13] ip_frag: add support for IPv6 reassembly Anatoly Burakov
2014-05-28 17:32 ` [dpdk-dev] [PATCH 13/13] examples: overhaul of ip_reassembly app Anatoly Burakov
2014-05-28 17:34 ` [dpdk-dev] [PATCH 00/13] *** SUBJECT HERE *** Burakov, Anatoly
2014-06-06 15:58 ` [dpdk-dev] [PATCH 00/13] IPv4/IPv6 fragmentation/reassembly library Cao, Waterman
2014-06-16 16:59 ` [dpdk-dev] [PATCH 00/13] IP fragmentation and reassembly Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1625c043bb49508dd2aab9877f86183ab7e403d7.1401298292.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).