From: Medvedkin Vladimir <medvedkinv@gmail.com>
To: dev@dpdk.org
Cc: Medvedkin Vladimir <medvedkinv@gmail.com>
Subject: [dpdk-dev] [PATCH v2 1/2] Add RIB library
Date: Wed, 21 Feb 2018 21:44:54 +0000 [thread overview]
Message-ID: <1519249495-16594-2-git-send-email-medvedkinv@gmail.com> (raw)
In-Reply-To: <1519249495-16594-1-git-send-email-medvedkinv@gmail.com>
RIB is an alternative to current LPM library.
It solves the following problems
- Increases the speed of control plane operations against lpm such as
adding/deleting routes
- Adds abstraction from dataplane algorithms, so it is possible to add
different ip route lookup algorythms such as DXR/poptrie/lpc-trie/etc
in addition to current dir24_8
- It is possible to keep user defined application specific additional
information in struct rte_rib_node which represents route entry.
It can be next hop/set of next hops (i.e. active and feasible),
pointers to link rte_rib_node based on some criteria (i.e. next_hop),
plenty of additional control plane information.
- For dir24_8 implementation it is possible to remove rte_lpm_tbl_entry.depth
field that helps to save 6 bits.
- Also new dir24_8 implementation supports different next_hop sizes
(1/2/4/8 bytes per next hop)
- Removed RTE_LPM_LOOKUP_SUCCESS to save 1 bit and to eleminate ternary operator.
Instead it returns special default value if there is no route.
Signed-off-by: Medvedkin Vladimir <medvedkinv@gmail.com>
---
config/common_base | 6 +
doc/api/doxy-api.conf | 1 +
lib/Makefile | 2 +
lib/librte_rib/Makefile | 22 ++
lib/librte_rib/rte_dir24_8.c | 482 +++++++++++++++++++++++++++++++++
lib/librte_rib/rte_dir24_8.h | 116 ++++++++
lib/librte_rib/rte_rib.c | 526 +++++++++++++++++++++++++++++++++++++
lib/librte_rib/rte_rib.h | 322 +++++++++++++++++++++++
lib/librte_rib/rte_rib_version.map | 18 ++
mk/rte.app.mk | 1 +
10 files changed, 1496 insertions(+)
create mode 100644 lib/librte_rib/Makefile
create mode 100644 lib/librte_rib/rte_dir24_8.c
create mode 100644 lib/librte_rib/rte_dir24_8.h
create mode 100644 lib/librte_rib/rte_rib.c
create mode 100644 lib/librte_rib/rte_rib.h
create mode 100644 lib/librte_rib/rte_rib_version.map
diff --git a/config/common_base b/config/common_base
index ad03cf4..aceeff5 100644
--- a/config/common_base
+++ b/config/common_base
@@ -679,6 +679,12 @@ CONFIG_RTE_LIBRTE_LPM=y
CONFIG_RTE_LIBRTE_LPM_DEBUG=n
#
+# Compile librte_rib
+#
+CONFIG_RTE_LIBRTE_RIB=y
+CONFIG_RTE_LIBRTE_RIB_DEBUG=n
+
+#
# Compile librte_acl
#
CONFIG_RTE_LIBRTE_ACL=y
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index cda52fd..8e4f969 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -60,6 +60,7 @@ INPUT = doc/api/doxy-api-index.md \
lib/librte_kvargs \
lib/librte_latencystats \
lib/librte_lpm \
+ lib/librte_rib \
lib/librte_mbuf \
lib/librte_member \
lib/librte_mempool \
diff --git a/lib/Makefile b/lib/Makefile
index ec965a6..7f2323a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,6 +43,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DEPDIRS-librte_lpm := librte_eal
+DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
+DEPDIRS-librte_rib := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
DEPDIRS-librte_acl := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
diff --git a/lib/librte_rib/Makefile b/lib/librte_rib/Makefile
new file mode 100644
index 0000000..cb97f02
--- /dev/null
+++ b/lib/librte_rib/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_rib.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_rib_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_RIB) := rte_rib.c rte_dir24_8.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_RIB)-include := rte_rib.h rte_dir24_8.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_rib/rte_dir24_8.c b/lib/librte_rib/rte_dir24_8.c
new file mode 100644
index 0000000..a12f882
--- /dev/null
+++ b/lib/librte_rib/rte_dir24_8.c
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <rte_debug.h>
+#include <rte_malloc.h>
+#include <rte_prefetch.h>
+#include <rte_errno.h>
+
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_rib.h>
+#include <rte_dir24_8.h>
+
+#define BITMAP_SLAB_BIT_SIZE_LOG2 6
+#define BITMAP_SLAB_BIT_SIZE (1 << BITMAP_SLAB_BIT_SIZE_LOG2)
+#define BITMAP_SLAB_BITMASK (BITMAP_SLAB_BIT_SIZE - 1)
+
+#define ROUNDUP(x, y) RTE_ALIGN_CEIL(x, (1 << (32 - y)))
+
+static __rte_always_inline __attribute__((pure)) void *
+get_tbl24_p(struct rte_dir24_8_tbl *fib, uint32_t ip)
+{
+ return (void *)&((uint8_t *)fib->tbl24)[(ip &
+ RTE_DIR24_8_TBL24_MASK) >> (8 - fib->nh_sz)];
+}
+
+#define LOOKUP_FUNC(suffix, type, bulk_prefetch) \
+int rte_dir24_8_lookup_bulk_##suffix(void *fib_p, const uint32_t *ips, \
+ uint64_t *next_hops, const unsigned n) \
+{ \
+ struct rte_dir24_8_tbl *fib = (struct rte_dir24_8_tbl *)fib_p; \
+ uint64_t tmp; \
+ uint32_t i; \
+ uint32_t prefetch_offset = RTE_MIN((unsigned)bulk_prefetch, n); \
+ \
+ RTE_RIB_RETURN_IF_TRUE(((fib == NULL) || (ips == NULL) || \
+ (next_hops == NULL)), -EINVAL); \
+ \
+ for (i = 0; i < prefetch_offset; i++) \
+ rte_prefetch0(get_tbl24_p(fib, ips[i])); \
+ for (i = 0; i < (n - prefetch_offset); i++) { \
+ rte_prefetch0(get_tbl24_p(fib, ips[i + prefetch_offset])); \
+ tmp = ((type *)fib->tbl24)[ips[i] >> 8]; \
+ if (unlikely((tmp & RTE_DIR24_8_VALID_EXT_ENT) == \
+ RTE_DIR24_8_VALID_EXT_ENT)) { \
+ tmp = ((type *)fib->tbl8)[(uint8_t)ips[i] + \
+ ((tmp >> 1) * RTE_DIR24_8_TBL8_GRP_NUM_ENT)]; \
+ } \
+ next_hops[i] = tmp >> 1; \
+ } \
+ for (; i < n; i++) { \
+ tmp = ((type *)fib->tbl24)[ips[i] >> 8]; \
+ if (unlikely((tmp & RTE_DIR24_8_VALID_EXT_ENT) == \
+ RTE_DIR24_8_VALID_EXT_ENT)) { \
+ tmp = ((type *)fib->tbl8)[(uint8_t)ips[i] + \
+ ((tmp >> 1) * RTE_DIR24_8_TBL8_GRP_NUM_ENT)]; \
+ } \
+ next_hops[i] = tmp >> 1; \
+ } \
+ return 0; \
+} \
+
+static void
+write_to_fib(void *ptr, uint64_t val, enum rte_dir24_8_nh_sz size, int n)
+{
+ int i;
+ uint8_t *ptr8 = (uint8_t *)ptr;
+ uint16_t *ptr16 = (uint16_t *)ptr;
+ uint32_t *ptr32 = (uint32_t *)ptr;
+ uint64_t *ptr64 = (uint64_t *)ptr;
+
+ switch (size) {
+ case RTE_DIR24_8_1B:
+ for (i = 0; i < n; i++)
+ ptr8[i] = (uint8_t)val;
+ break;
+ case RTE_DIR24_8_2B:
+ for (i = 0; i < n; i++)
+ ptr16[i] = (uint16_t)val;
+ break;
+ case RTE_DIR24_8_4B:
+ for (i = 0; i < n; i++)
+ ptr32[i] = (uint32_t)val;
+ break;
+ case RTE_DIR24_8_8B:
+ for (i = 0; i < n; i++)
+ ptr64[i] = (uint64_t)val;
+ break;
+ }
+}
+
+static int
+tbl8_get_idx(struct rte_dir24_8_tbl *fib)
+{
+ uint32_t i;
+ int bit_idx;
+
+ for (i = 0; (i < (fib->number_tbl8s >> BITMAP_SLAB_BIT_SIZE_LOG2)) &&
+ (fib->tbl8_idxes[i] == UINT64_MAX); i++)
+ ;
+ if (i <= (fib->number_tbl8s >> BITMAP_SLAB_BIT_SIZE_LOG2)) {
+ bit_idx = __builtin_ctzll(~fib->tbl8_idxes[i]);
+ fib->tbl8_idxes[i] |= (1ULL << bit_idx);
+ return (i << BITMAP_SLAB_BIT_SIZE_LOG2) + bit_idx;
+ }
+ return -ENOSPC;
+}
+
+static inline void
+tbl8_free_idx(struct rte_dir24_8_tbl *fib, int idx)
+{
+ fib->tbl8_idxes[idx >> BITMAP_SLAB_BIT_SIZE_LOG2] &=
+ ~(1ULL << (idx & BITMAP_SLAB_BITMASK));
+}
+
+static int
+tbl8_alloc(struct rte_dir24_8_tbl *fib, uint64_t nh)
+{
+ int tbl8_idx;
+ uint8_t *tbl8_ptr;
+
+ tbl8_idx = tbl8_get_idx(fib);
+ if (tbl8_idx < 0)
+ return tbl8_idx;
+ tbl8_ptr = (uint8_t *)fib->tbl8 +
+ ((tbl8_idx * RTE_DIR24_8_TBL8_GRP_NUM_ENT) <<
+ fib->nh_sz);
+ /*Init tbl8 entries with nexthop from tbl24*/
+ write_to_fib((void *)tbl8_ptr, nh|
+ RTE_DIR24_8_VALID_EXT_ENT, fib->nh_sz,
+ RTE_DIR24_8_TBL8_GRP_NUM_ENT);
+ return tbl8_idx;
+}
+
+static void
+tbl8_recycle(struct rte_dir24_8_tbl *fib, uint32_t ip, uint64_t tbl8_idx)
+{
+ int i;
+ uint64_t nh;
+ uint8_t *ptr8;
+ uint16_t *ptr16;
+ uint32_t *ptr32;
+ uint64_t *ptr64;
+
+ switch (fib->nh_sz) {
+ case RTE_DIR24_8_1B:
+ ptr8 = &((uint8_t *)fib->tbl8)[tbl8_idx *
+ RTE_DIR24_8_TBL8_GRP_NUM_ENT];
+ nh = *ptr8;
+ for (i = 1; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++) {
+ if (nh != ptr8[i])
+ return;
+ }
+ ((uint8_t *)fib->tbl24)[ip >> 8] =
+ nh & ~RTE_DIR24_8_VALID_EXT_ENT;
+ for (i = 0; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++)
+ ptr8[i] = 0;
+ break;
+ case RTE_DIR24_8_2B:
+ ptr16 = &((uint16_t *)fib->tbl8)[tbl8_idx *
+ RTE_DIR24_8_TBL8_GRP_NUM_ENT];
+ nh = *ptr16;
+ for (i = 1; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++) {
+ if (nh != ptr16[i])
+ return;
+ }
+ ((uint16_t *)fib->tbl24)[ip >> 8] =
+ nh & ~RTE_DIR24_8_VALID_EXT_ENT;
+ for (i = 0; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++)
+ ptr16[i] = 0;
+ break;
+ case RTE_DIR24_8_4B:
+ ptr32 = &((uint32_t *)fib->tbl8)[tbl8_idx *
+ RTE_DIR24_8_TBL8_GRP_NUM_ENT];
+ nh = *ptr32;
+ for (i = 1; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++) {
+ if (nh != ptr32[i])
+ return;
+ }
+ ((uint32_t *)fib->tbl24)[ip >> 8] =
+ nh & ~RTE_DIR24_8_VALID_EXT_ENT;
+ for (i = 0; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++)
+ ptr32[i] = 0;
+ break;
+ case RTE_DIR24_8_8B:
+ ptr64 = &((uint64_t *)fib->tbl8)[tbl8_idx *
+ RTE_DIR24_8_TBL8_GRP_NUM_ENT];
+ nh = *ptr64;
+ for (i = 1; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++) {
+ if (nh != ptr64[i])
+ return;
+ }
+ ((uint64_t *)fib->tbl24)[ip >> 8] =
+ nh & ~RTE_DIR24_8_VALID_EXT_ENT;
+ for (i = 0; i < RTE_DIR24_8_TBL8_GRP_NUM_ENT; i++)
+ ptr64[i] = 0;
+ break;
+ }
+ tbl8_free_idx(fib, tbl8_idx);
+}
+
+static int
+install_to_fib(struct rte_dir24_8_tbl *fib, uint32_t ledge, uint32_t redge,
+ uint64_t next_hop)
+{
+ uint64_t tbl24_tmp;
+ int tbl8_idx;
+ int tmp_tbl8_idx;
+ uint8_t *tbl8_ptr;
+
+ /*case for 0.0.0.0/0*/
+ if (unlikely((ledge == 0) && (redge == 0))) {
+ write_to_fib(fib->tbl24, next_hop << 1, fib->nh_sz, 1 << 24);
+ return 0;
+ }
+ if (ROUNDUP(ledge, 24) <= redge) {
+ if (ledge < ROUNDUP(ledge, 24)) {
+ tbl24_tmp = RTE_DIR24_8_GET_TBL24(fib, ledge);
+ if ((tbl24_tmp & RTE_DIR24_8_VALID_EXT_ENT) !=
+ RTE_DIR24_8_VALID_EXT_ENT) {
+ tbl8_idx = tbl8_alloc(fib, tbl24_tmp);
+ tmp_tbl8_idx = tbl8_get_idx(fib);
+ if ((tbl8_idx < 0) || (tmp_tbl8_idx < 0))
+ return -ENOSPC;
+ tbl8_free_idx(fib, tmp_tbl8_idx);
+ /*update dir24 entry with tbl8 index*/
+ write_to_fib(get_tbl24_p(fib, ledge),
+ (tbl8_idx << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, 1);
+ } else
+ tbl8_idx = tbl24_tmp >> 1;
+ tbl8_ptr = (uint8_t *)fib->tbl8 +
+ (((tbl8_idx * RTE_DIR24_8_TBL8_GRP_NUM_ENT) +
+ (ledge & ~RTE_DIR24_8_TBL24_MASK)) <<
+ fib->nh_sz);
+ /*update tbl8 with new next hop*/
+ write_to_fib((void *)tbl8_ptr, (next_hop << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, ROUNDUP(ledge, 24) - ledge);
+ tbl8_recycle(fib, ledge, tbl8_idx);
+ }
+ if (ROUNDUP(ledge, 24) < (redge & RTE_DIR24_8_TBL24_MASK)) {
+ write_to_fib(get_tbl24_p(fib, ROUNDUP(ledge, 24)),
+ next_hop << 1, fib->nh_sz,
+ ((redge & RTE_DIR24_8_TBL24_MASK) -
+ ROUNDUP(ledge, 24)) >> 8);
+ }
+ if (redge & ~RTE_DIR24_8_TBL24_MASK) {
+ tbl24_tmp = RTE_DIR24_8_GET_TBL24(fib, redge);
+ if ((tbl24_tmp & RTE_DIR24_8_VALID_EXT_ENT) !=
+ RTE_DIR24_8_VALID_EXT_ENT) {
+ tbl8_idx = tbl8_alloc(fib, tbl24_tmp);
+ if (tbl8_idx < 0)
+ return -ENOSPC;
+ /*update dir24 entry with tbl8 index*/
+ write_to_fib(get_tbl24_p(fib, redge),
+ (tbl8_idx << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, 1);
+ } else
+ tbl8_idx = tbl24_tmp >> 1;
+ tbl8_ptr = (uint8_t *)fib->tbl8 +
+ ((tbl8_idx * RTE_DIR24_8_TBL8_GRP_NUM_ENT) <<
+ fib->nh_sz);
+ /*update tbl8 with new next hop*/
+ write_to_fib((void *)tbl8_ptr, (next_hop << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, redge & ~RTE_DIR24_8_TBL24_MASK);
+ tbl8_recycle(fib, redge, tbl8_idx);
+ }
+ } else {
+ tbl24_tmp = RTE_DIR24_8_GET_TBL24(fib, ledge);
+ if ((tbl24_tmp & RTE_DIR24_8_VALID_EXT_ENT) !=
+ RTE_DIR24_8_VALID_EXT_ENT) {
+ tbl8_idx = tbl8_alloc(fib, tbl24_tmp);
+ if (tbl8_idx < 0)
+ return -ENOSPC;
+ /*update dir24 entry with tbl8 index*/
+ write_to_fib(get_tbl24_p(fib, ledge),
+ (tbl8_idx << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, 1);
+ } else
+ tbl8_idx = tbl24_tmp >> 1;
+ tbl8_ptr = (uint8_t *)fib->tbl8 +
+ (((tbl8_idx * RTE_DIR24_8_TBL8_GRP_NUM_ENT) +
+ (ledge & ~RTE_DIR24_8_TBL24_MASK)) <<
+ fib->nh_sz);
+ /*update tbl8 with new next hop*/
+ write_to_fib((void *)tbl8_ptr, (next_hop << 1)|
+ RTE_DIR24_8_VALID_EXT_ENT,
+ fib->nh_sz, redge - ledge);
+ tbl8_recycle(fib, ledge, tbl8_idx);
+ }
+ return 0;
+}
+
+static int
+modify_fib(struct rte_rib *rib, uint32_t ip, uint8_t depth,
+ uint64_t next_hop)
+{
+ struct rte_rib_node *tmp = NULL;
+ struct rte_dir24_8_tbl *fib;
+ uint32_t ledge, redge;
+ int ret;
+
+ fib = rib->fib;
+
+ if (next_hop > DIR24_8_MAX_NH(fib))
+ return -EINVAL;
+
+ ledge = ip;
+ do {
+ tmp = rte_rib_tree_get_nxt(rib, ip, depth, tmp,
+ RTE_RIB_GET_NXT_COVER);
+ if (tmp != NULL) {
+ if (tmp->depth == depth)
+ continue;
+ redge = tmp->key;
+ if (ledge == redge) {
+ ledge = redge +
+ (uint32_t)(1ULL << (32 - tmp->depth));
+ continue;
+ }
+ ret = install_to_fib(fib, ledge, redge,
+ next_hop);
+ if (ret != 0)
+ return ret;
+ ledge = redge +
+ (uint32_t)(1ULL << (32 - tmp->depth));
+ } else {
+ redge = ip + (uint32_t)(1ULL << (32 - depth));
+ ret = install_to_fib(fib, ledge, redge,
+ next_hop);
+ if (ret != 0)
+ return ret;
+ }
+ } while (tmp);
+
+ return 0;
+}
+
+int
+rte_dir24_8_modify(struct rte_rib *rib, uint32_t ip, uint8_t depth,
+ uint64_t next_hop, enum rte_rib_op op)
+{
+ struct rte_dir24_8_tbl *fib;
+ struct rte_rib_node *tmp = NULL;
+ struct rte_rib_node *node;
+ struct rte_rib_node *parent;
+ int ret = 0;
+
+ if ((rib == NULL) || (depth > RTE_RIB_MAXDEPTH))
+ return -EINVAL;
+
+ fib = rib->fib;
+ RTE_ASSERT(fib);
+
+ ip &= (uint32_t)(UINT64_MAX << (32 - depth));
+
+ node = rte_rib_tree_lookup_exact(rib, ip, depth);
+ switch (op) {
+ case RTE_RIB_ADD:
+ if (node != NULL) {
+ if (node->nh == next_hop)
+ return 0;
+ ret = modify_fib(rib, ip, depth, next_hop);
+ if (ret == 0)
+ node->nh = next_hop;
+ return 0;
+ }
+ if (depth > 24) {
+ tmp = rte_rib_tree_get_nxt(rib, ip, 24, NULL,
+ RTE_RIB_GET_NXT_COVER);
+ if ((tmp == NULL) &&
+ (fib->cur_tbl8s >= fib->number_tbl8s))
+ return -ENOSPC;
+
+ }
+ node = rte_rib_tree_insert(rib, ip, depth);
+ if (node == NULL)
+ return -rte_errno;
+ node->nh = next_hop;
+ parent = rte_rib_tree_lookup_parent(node);
+ if ((parent != NULL) && (parent->nh == next_hop))
+ return 0;
+ ret = modify_fib(rib, ip, depth, next_hop);
+ if (ret) {
+ rte_rib_tree_remove(rib, ip, depth);
+ return ret;
+ }
+ if ((depth > 24) && (tmp == NULL))
+ fib->cur_tbl8s++;
+ return 0;
+ case RTE_RIB_DEL:
+ if (node == NULL)
+ return -ENOENT;
+
+ parent = rte_rib_tree_lookup_parent(node);
+ if (parent != NULL) {
+ if (parent->nh != node->nh)
+ ret = modify_fib(rib, ip, depth, parent->nh);
+ } else
+ ret = modify_fib(rib, ip, depth, fib->def_nh);
+ if (ret == 0) {
+ rte_rib_tree_remove(rib, ip, depth);
+ if (depth > 24) {
+ tmp = rte_rib_tree_get_nxt(rib, ip, 24, NULL,
+ RTE_RIB_GET_NXT_COVER);
+ if (tmp == NULL)
+ fib->cur_tbl8s--;
+ }
+ }
+ return ret;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+struct rte_dir24_8_tbl *rte_dir24_8_create(const char *name, int socket_id,
+ enum rte_dir24_8_nh_sz nh_sz, uint64_t def_nh)
+{
+ char mem_name[RTE_RIB_NAMESIZE];
+ struct rte_dir24_8_tbl *fib;
+
+ snprintf(mem_name, sizeof(mem_name), "FIB_%s", name);
+ fib = rte_zmalloc_socket(name, sizeof(struct rte_dir24_8_tbl) +
+ RTE_DIR24_8_TBL24_NUM_ENT * (1 << nh_sz), RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (fib == NULL)
+ return fib;
+
+ snprintf(mem_name, sizeof(mem_name), "TBL8_%s", name);
+ fib->tbl8 = rte_zmalloc_socket(mem_name, RTE_DIR24_8_TBL8_GRP_NUM_ENT *
+ (1 << nh_sz) * RTE_DIR24_8_TBL8_NUM_GROUPS,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (fib->tbl8 == NULL) {
+ rte_free(fib);
+ return NULL;
+ }
+ fib->def_nh = def_nh;
+ fib->nh_sz = nh_sz;
+ fib->number_tbl8s = RTE_MIN((uint32_t)RTE_DIR24_8_TBL8_NUM_GROUPS,
+ DIR24_8_MAX_NH(fib));
+
+ snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%s", name);
+ fib->tbl8_idxes = rte_zmalloc_socket(mem_name,
+ RTE_ALIGN_CEIL(fib->number_tbl8s, 64) >> 3,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (fib->tbl8_idxes == NULL) {
+ rte_free(fib->tbl8);
+ rte_free(fib);
+ return NULL;
+ }
+
+ return fib;
+}
+
+void
+rte_dir24_8_free(void *fib_p)
+{
+ struct rte_dir24_8_tbl *fib = (struct rte_dir24_8_tbl *)fib_p;
+
+ rte_free(fib->tbl8_idxes);
+ rte_free(fib->tbl8);
+ rte_free(fib);
+}
+
+LOOKUP_FUNC(1b, uint8_t, 5)
+LOOKUP_FUNC(2b, uint16_t, 6)
+LOOKUP_FUNC(4b, uint32_t, 15)
+LOOKUP_FUNC(8b, uint64_t, 12)
diff --git a/lib/librte_rib/rte_dir24_8.h b/lib/librte_rib/rte_dir24_8.h
new file mode 100644
index 0000000..f779409
--- /dev/null
+++ b/lib/librte_rib/rte_dir24_8.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ */
+
+#ifndef _RTE_DIR24_8_H_
+#define _RTE_DIR24_8_H_
+
+/**
+ * @file
+ * RTE Longest Prefix Match (LPM)
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @internal Total number of tbl24 entries. */
+#define RTE_DIR24_8_TBL24_NUM_ENT (1 << 24)
+
+/** Maximum depth value possible for IPv4 LPM. */
+#define RTE_DIR24_8_MAX_DEPTH 32
+
+/** @internal Number of entries in a tbl8 group. */
+#define RTE_DIR24_8_TBL8_GRP_NUM_ENT 256
+
+/** @internal Total number of tbl8 groups in the tbl8. */
+#define RTE_DIR24_8_TBL8_NUM_GROUPS 65536
+
+/** @internal bitmask with valid and valid_group fields set */
+#define RTE_DIR24_8_VALID_EXT_ENT 0x01
+
+#define RTE_DIR24_8_TBL24_MASK 0xffffff00
+
+/** Size of nexthop (1 << nh_sz) bits */
+enum rte_dir24_8_nh_sz {
+ RTE_DIR24_8_1B,
+ RTE_DIR24_8_2B,
+ RTE_DIR24_8_4B,
+ RTE_DIR24_8_8B
+};
+
+
+#define DIR24_8_BITS_IN_NH(fib) (8 * (1 << fib->nh_sz))
+#define DIR24_8_MAX_NH(fib) ((1ULL << (DIR24_8_BITS_IN_NH(fib) - 1)) - 1)
+
+#define DIR24_8_TBL_IDX(a, fib) ((a) >> (3 - fib->nh_sz))
+#define DIR24_8_PSD_IDX(a, fib) ((a) & ((1 << (3 - fib->nh_sz)) - 1))
+
+#define DIR24_8_TBL24_VAL(ip) (ip >> 8)
+#define DIR24_8_TBL8_VAL(res, ip) \
+ ((res >> 1) * RTE_DIR24_8_TBL8_GRP_NUM_ENT + (uint8_t)ip) \
+
+#define DIR24_8_LOOKUP_MSK \
+ (((1ULL << ((1 << (fib->nh_sz + 3)) - 1)) << 1) - 1) \
+
+#define RTE_DIR24_8_GET_TBL24(fib, ip) \
+ ((fib->tbl24[DIR24_8_TBL_IDX(DIR24_8_TBL24_VAL(ip), fib)] >> \
+ (DIR24_8_PSD_IDX(DIR24_8_TBL24_VAL(ip), fib) * \
+ DIR24_8_BITS_IN_NH(fib))) & DIR24_8_LOOKUP_MSK) \
+
+#define RTE_DIR24_8_GET_TBL8(fib, res, ip) \
+ ((fib->tbl8[DIR24_8_TBL_IDX(DIR24_8_TBL8_VAL(res, ip), fib)] >> \
+ (DIR24_8_PSD_IDX(DIR24_8_TBL8_VAL(res, ip), fib) * \
+ DIR24_8_BITS_IN_NH(fib))) & DIR24_8_LOOKUP_MSK) \
+
+
+struct rte_dir24_8_tbl {
+ uint32_t number_tbl8s; /**< Total number of tbl8s. */
+ uint32_t cur_tbl8s; /**< Current cumber of tbl8s. */
+ uint64_t def_nh;
+ enum rte_dir24_8_nh_sz nh_sz; /**< Size of nexthop entry */
+ uint64_t *tbl8; /**< LPM tbl8 table. */
+ uint64_t *tbl8_idxes;
+ uint64_t tbl24[0] __rte_cache_aligned; /**< LPM tbl24 table. */
+};
+
+struct rte_dir24_8_tbl *rte_dir24_8_create(const char *name, int socket_id,
+ enum rte_dir24_8_nh_sz nh_sz, uint64_t def_nh);
+void rte_dir24_8_free(void *fib_p);
+int rte_dir24_8_modify(struct rte_rib *rib, uint32_t key,
+ uint8_t depth, uint64_t next_hop, enum rte_rib_op op);
+int rte_dir24_8_lookup_bulk_1b(void *fib_p, const uint32_t *ips,
+ uint64_t *next_hops, const unsigned n);
+int rte_dir24_8_lookup_bulk_2b(void *fib_p, const uint32_t *ips,
+ uint64_t *next_hops, const unsigned n);
+int rte_dir24_8_lookup_bulk_4b(void *fib_p, const uint32_t *ips,
+ uint64_t *next_hops, const unsigned n);
+int rte_dir24_8_lookup_bulk_8b(void *fib_p, const uint32_t *ips,
+ uint64_t *next_hops, const unsigned n);
+
+
+static inline int
+rte_dir24_8_lookup(void *fib_p, uint32_t ip, uint64_t *next_hop)
+{
+ uint64_t res;
+ struct rte_dir24_8_tbl *fib = (struct rte_dir24_8_tbl *)fib_p;
+
+ RTE_RIB_RETURN_IF_TRUE(((fib == NULL) || (ip == NULL) ||
+ (next_hop == NULL)), -EINVAL);
+
+ res = RTE_DIR24_8_GET_TBL24(fib, ip);
+ if (unlikely((res & RTE_DIR24_8_VALID_EXT_ENT) ==
+ RTE_DIR24_8_VALID_EXT_ENT)) {
+ res = RTE_DIR24_8_GET_TBL8(fib, res, ip);
+ }
+ *next_hop = res >> 1;
+ return 0;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DIR24_8_H_ */
+
diff --git a/lib/librte_rib/rte_rib.c b/lib/librte_rib/rte_rib.c
new file mode 100644
index 0000000..7783b23
--- /dev/null
+++ b/lib/librte_rib/rte_rib.c
@@ -0,0 +1,526 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_errno.h>
+#include <rte_rwlock.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include <rte_rib.h>
+#include <rte_dir24_8.h>
+
+TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_rib_tailq = {
+ .name = "RTE_RIB",
+};
+EAL_REGISTER_TAILQ(rte_rib_tailq)
+
+static struct rte_rib_node *
+new_node_malloc(struct rte_rib *rib)
+{
+ struct rte_rib_node *ent;
+
+ ent = malloc(rib->node_sz);
+ if (unlikely(ent == NULL))
+ return NULL;
+ ++rib->cur_nodes;
+ return ent;
+}
+
+static void
+free_node_malloc(__rte_unused struct rte_rib *rib, struct rte_rib_node *ent)
+{
+ --rib->cur_nodes;
+ free(ent);
+}
+
+static struct rte_rib_node *
+new_node_mempool(struct rte_rib *rib)
+{
+ struct rte_rib_node *ent;
+ int ret;
+
+ ret = rte_mempool_get(rib->node_pool, (void *)&ent);
+ if (unlikely(ret != 0))
+ return NULL;
+ ++rib->cur_nodes;
+ return ent;
+}
+
+static void
+free_node_mempool(struct rte_rib *rib, struct rte_rib_node *ent)
+{
+ --rib->cur_nodes;
+ rte_mempool_put(rib->node_pool, ent);
+}
+
+struct rte_rib_node *
+rte_rib_tree_lookup(struct rte_rib *rib, uint32_t key)
+{
+ struct rte_rib_node *cur = rib->trie;
+ struct rte_rib_node *prev = NULL;
+
+ while ((cur != NULL) && (((cur->key ^ key) &
+ (uint32_t)(UINT64_MAX << (32 - cur->depth))) == 0)) {
+ if ((cur->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE)
+ prev = cur;
+ cur = RTE_RIB_GET_NXT_NODE(cur, key);
+ }
+ return prev;
+}
+
+struct rte_rib_node *
+rte_rib_tree_lookup_parent(struct rte_rib_node *ent)
+{
+ struct rte_rib_node *tmp;
+
+ if (ent == NULL)
+ return NULL;
+ tmp = ent->parent;
+ while ((tmp != NULL) &&
+ (tmp->flag & RTE_RIB_VALID_NODE) != RTE_RIB_VALID_NODE) {
+ tmp = tmp->parent;
+}
+ return tmp;
+}
+
+struct rte_rib_node *
+rte_rib_tree_lookup_exact(struct rte_rib *rib, uint32_t key, uint8_t depth)
+{
+ struct rte_rib_node *cur = rib->trie;
+
+ key &= (uint32_t)(UINT64_MAX << (32 - depth));
+ while (cur != NULL) {
+ if ((cur->key == key) && (cur->depth == depth) &&
+ (cur->flag & RTE_RIB_VALID_NODE))
+ return cur;
+ if ((cur->depth > depth) ||
+ (((uint64_t)key >> (32 - cur->depth)) !=
+ ((uint64_t)cur->key >> (32 - cur->depth))))
+ break;
+ cur = RTE_RIB_GET_NXT_NODE(cur, key);
+ }
+ return NULL;
+}
+
+struct rte_rib_node *
+rte_rib_tree_get_nxt(struct rte_rib *rib, uint32_t key,
+ uint8_t depth, struct rte_rib_node *cur, int flag)
+{
+ struct rte_rib_node *tmp, *prev = NULL;
+
+ if (cur == NULL) {
+ tmp = rib->trie;
+ while ((tmp) && (tmp->depth < depth))
+ tmp = RTE_RIB_GET_NXT_NODE(tmp, key);
+ } else {
+ tmp = cur;
+ while ((tmp->parent != NULL) && (RTE_RIB_IS_RIGHT_NODE(tmp) ||
+ (tmp->parent->right == NULL))) {
+ tmp = tmp->parent;
+ if ((tmp->flag & RTE_RIB_VALID_NODE) &&
+ (RTE_RIB_IS_COVERED(tmp->key, tmp->depth,
+ key, depth)))
+ return tmp;
+ }
+ tmp = (tmp->parent) ? tmp->parent->right : NULL;
+ }
+ while (tmp) {
+ if ((tmp->flag & RTE_RIB_VALID_NODE) &&
+ (RTE_RIB_IS_COVERED(tmp->key, tmp->depth,
+ key, depth))) {
+ prev = tmp;
+ if (flag == RTE_RIB_GET_NXT_COVER)
+ return prev;
+ }
+ tmp = (tmp->left) ? tmp->left : tmp->right;
+ }
+ return prev;
+}
+
+void
+rte_rib_tree_remove(struct rte_rib *rib, uint32_t key, uint8_t depth)
+{
+ struct rte_rib_node *cur, *prev, *child;
+
+ cur = rte_rib_tree_lookup_exact(rib, key, depth);
+ if (cur == NULL)
+ return;
+
+ --rib->cur_routes;
+ cur->flag &= ~RTE_RIB_VALID_NODE;
+ while ((cur->flag & RTE_RIB_VALID_NODE) != RTE_RIB_VALID_NODE) {
+ if ((cur->left != NULL) && (cur->right != NULL))
+ return;
+ child = (cur->left == NULL) ? cur->right : cur->left;
+ if (child != NULL)
+ child->parent = cur->parent;
+ if (cur->parent == NULL) {
+ rib->trie = child;
+ rib->free_node(rib, cur);
+ return;
+ }
+ if (cur->parent->left == cur)
+ cur->parent->left = child;
+ else
+ cur->parent->right = child;
+ prev = cur;
+ cur = cur->parent;
+ rib->free_node(rib, prev);
+ }
+}
+
+struct rte_rib_node *
+rte_rib_tree_insert(struct rte_rib *rib, uint32_t key, uint8_t depth)
+{
+ struct rte_rib_node **tmp = &rib->trie;
+ struct rte_rib_node *prev = NULL;
+ struct rte_rib_node *new_node = NULL;
+ struct rte_rib_node *common_node = NULL;
+ int i = 0;
+ uint32_t common_prefix;
+ uint8_t common_depth;
+
+ if (depth > 32) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ key &= (uint32_t)(UINT64_MAX << (32 - depth));
+ new_node = rte_rib_tree_lookup_exact(rib, key, depth);
+ if (new_node != NULL) {
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ new_node = rib->alloc_node(rib);
+ if (new_node == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ new_node->left = NULL;
+ new_node->right = NULL;
+ new_node->parent = NULL;
+ new_node->key = key;
+ new_node->depth = depth;
+ new_node->flag = RTE_RIB_VALID_NODE;
+
+ while (1) {
+ if (*tmp == NULL) {
+ *tmp = new_node;
+ new_node->parent = prev;
+ }
+ if ((key == (*tmp)->key) && (depth == (*tmp)->depth)) {
+ if (new_node != *tmp) {
+ rib->free_node(rib, new_node);
+ (*tmp)->flag |= RTE_RIB_VALID_NODE;
+ }
+ ++rib->cur_routes;
+ return *tmp;
+ }
+ i = (*tmp)->depth;
+ if ((i >= depth) || (((uint64_t)key >> (32 - i)) !=
+ ((uint64_t)(*tmp)->key >> (32 - i))))
+ break;
+ prev = *tmp;
+ tmp = (key & (1 << (31 - i))) ? &(*tmp)->right : &(*tmp)->left;
+ }
+ common_depth = RTE_MIN(depth, (*tmp)->depth);
+ common_prefix = key ^ (*tmp)->key;
+ i = __builtin_clz(common_prefix);
+
+ common_depth = RTE_MIN(i, common_depth);
+ common_prefix = key & (uint32_t)(UINT64_MAX << (32 - common_depth));
+ if ((common_prefix == key) && (common_depth == depth)) {
+ if ((*tmp)->key & (1 << (31 - depth)))
+ new_node->right = *tmp;
+ else
+ new_node->left = *tmp;
+ new_node->parent = (*tmp)->parent;
+ (*tmp)->parent = new_node;
+ *tmp = new_node;
+ } else {
+ common_node = rib->alloc_node(rib);
+ if (common_node == NULL) {
+ rib->free_node(rib, new_node);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ common_node->key = common_prefix;
+ common_node->depth = common_depth;
+ common_node->flag = 0;
+ common_node->parent = (*tmp)->parent;
+ new_node->parent = common_node;
+ (*tmp)->parent = common_node;
+ if ((new_node->key & (1 << (31 - common_depth))) == 0) {
+ common_node->left = new_node;
+ common_node->right = *tmp;
+ } else {
+ common_node->left = *tmp;
+ common_node->right = new_node;
+ }
+ *tmp = common_node;
+ }
+ ++rib->cur_routes;
+ return new_node;
+}
+
+struct rte_rib *
+rte_rib_create(const char *name, int socket_id, struct rte_rib_conf *conf)
+{
+ char mem_name[RTE_RIB_NAMESIZE];
+ struct rte_rib *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+ struct rte_mempool *node_pool = NULL;
+ enum rte_dir24_8_nh_sz dir24_8_nh_size;
+
+ /* Check user arguments. */
+ if ((name == NULL) || (conf == NULL) || (socket_id < -1) ||
+ (conf->type >= RTE_RIB_TYPE_MAX) ||
+ (conf->alloc_type >= RTE_RIB_ALLOC_MAX) ||
+ (conf->max_nodes == 0) ||
+ (conf->node_sz < sizeof(struct rte_rib_node))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (conf->alloc_type == RTE_RIB_MEMPOOL) {
+ snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
+ node_pool = rte_mempool_create(mem_name, conf->max_nodes,
+ conf->node_sz, 0, 0, NULL, NULL, NULL, NULL,
+ socket_id, 0);
+
+ if (node_pool == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate mempool for RIB %s\n", name);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
+
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+ /* guarantee there's no existing */
+ TAILQ_FOREACH(te, rib_list, next) {
+ rib = (struct rte_rib *)te->data;
+ if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
+ break;
+ }
+ rib = NULL;
+ if (te != NULL) {
+ rte_errno = EEXIST;
+ goto exit;
+ }
+
+ /* allocate tailq entry */
+ te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate tailq entry for RIB %s\n", name);
+ rte_errno = ENOMEM;
+ goto exit;
+ }
+
+ /* Allocate memory to store the LPM data structures. */
+ rib = (struct rte_rib *)rte_zmalloc_socket(mem_name,
+ sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id);
+ if (rib == NULL) {
+ RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n", name);
+ rte_errno = ENOMEM;
+ goto free_te;
+ }
+ snprintf(rib->name, sizeof(rib->name), "%s", name);
+ rib->trie = NULL;
+ rib->max_nodes = conf->max_nodes;
+ rib->node_sz = conf->node_sz;
+ rib->type = conf->type;
+ rib->alloc_type = conf->alloc_type;
+
+ if (conf->type <= RTE_RIB_DIR24_8_8B) {
+ switch (conf->type) {
+ case RTE_RIB_DIR24_8_1B:
+ dir24_8_nh_size = RTE_DIR24_8_1B;
+ rib->lookup = rte_dir24_8_lookup_bulk_1b;
+ break;
+ case RTE_RIB_DIR24_8_2B:
+ dir24_8_nh_size = RTE_DIR24_8_2B;
+ rib->lookup = rte_dir24_8_lookup_bulk_2b;
+ break;
+ case RTE_RIB_DIR24_8_4B:
+ dir24_8_nh_size = RTE_DIR24_8_4B;
+ rib->lookup = rte_dir24_8_lookup_bulk_4b;
+ break;
+ case RTE_RIB_DIR24_8_8B:
+ dir24_8_nh_size = RTE_DIR24_8_8B;
+ rib->lookup = rte_dir24_8_lookup_bulk_8b;
+ break;
+ case RTE_RIB_TYPE_MAX:
+ default:
+ RTE_LOG(ERR, LPM, "Bad RIB %s type\n", name);
+ rte_errno = EINVAL;
+ goto free_rib;
+ }
+ rib->fib = (void *)rte_dir24_8_create(name, socket_id,
+ dir24_8_nh_size, conf->def_nh);
+ if (rib->fib == NULL) {
+ RTE_LOG(ERR, LPM, "Failed to allocate FIB %s\n", name);
+ rte_errno = ENOMEM;
+ goto free_rib;
+ }
+ rib->modify = rte_dir24_8_modify;
+ }
+
+ switch (conf->alloc_type) {
+ case RTE_RIB_MALLOC:
+ rib->alloc_node = new_node_malloc;
+ rib->free_node = free_node_malloc;
+ break;
+ case RTE_RIB_MEMPOOL:
+ rib->node_pool = node_pool;
+ rib->alloc_node = new_node_mempool;
+ rib->free_node = free_node_mempool;
+ break;
+ case RTE_RIB_ALLOC_MAX:
+ default:
+ RTE_LOG(ERR, LPM, "Bad RIB %s alloc type\n", name);
+ rte_errno = EINVAL;
+ goto free_fib;
+ }
+
+ te->data = (void *)rib;
+ TAILQ_INSERT_TAIL(rib_list, te, next);
+
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ return rib;
+
+free_fib:
+ switch (conf->type) {
+ case RTE_RIB_DIR24_8_1B:
+ case RTE_RIB_DIR24_8_2B:
+ case RTE_RIB_DIR24_8_4B:
+ case RTE_RIB_DIR24_8_8B:
+ rte_dir24_8_free(rib->fib);
+ break;
+ default:
+ break;
+ }
+free_rib:
+ rte_free(rib);
+free_te:
+ rte_free(te);
+exit:
+ if (conf->alloc_type == RTE_RIB_MEMPOOL)
+ rte_mempool_free(node_pool);
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ return NULL;
+}
+
+struct rte_rib *
+rte_rib_find_existing(const char *name)
+{
+ struct rte_rib *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+ TAILQ_FOREACH(te, rib_list, next) {
+ rib = (struct rte_rib *) te->data;
+ if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
+ break;
+ }
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ if (te == NULL) {
+ rte_errno = ENOENT;
+ return NULL;
+ }
+
+ return rib;
+}
+
+void
+rte_rib_free(struct rte_rib *rib)
+{
+ struct rte_tailq_entry *te;
+ struct rte_rib_list *rib_list;
+ struct rte_rib_node *tmp = NULL;
+
+ if (rib == NULL)
+ return;
+
+ rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
+
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ /* find our tailq entry */
+ TAILQ_FOREACH(te, rib_list, next) {
+ if (te->data == (void *)rib)
+ break;
+ }
+ if (te != NULL)
+ TAILQ_REMOVE(rib_list, te, next);
+
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ while ((tmp = rte_rib_tree_get_nxt(rib, 0, 0, tmp,
+ RTE_RIB_GET_NXT_ALL)) != NULL)
+ rte_rib_tree_remove(rib, tmp->key, tmp->depth);
+
+ if (rib->alloc_type == RTE_RIB_MEMPOOL)
+ rte_mempool_free(rib->node_pool);
+
+ switch (rib->type) {
+ case RTE_RIB_DIR24_8_1B:
+ case RTE_RIB_DIR24_8_2B:
+ case RTE_RIB_DIR24_8_4B:
+ case RTE_RIB_DIR24_8_8B:
+ rte_dir24_8_free(rib->fib);
+ break;
+ default:
+ break;
+ }
+
+ rte_free(rib);
+ rte_free(te);
+}
+
+int
+rte_rib_add(struct rte_rib *rib, uint32_t ip, uint8_t depth, uint64_t next_hop)
+{
+ if ((rib == NULL) || (depth > RTE_RIB_MAXDEPTH))
+ return -EINVAL;
+
+ return rib->modify(rib, ip, depth, next_hop, RTE_RIB_ADD);
+}
+
+int
+rte_rib_delete(struct rte_rib *rib, uint32_t ip, uint8_t depth)
+{
+ if ((rib == NULL) || (depth > RTE_RIB_MAXDEPTH))
+ return -EINVAL;
+
+ return rib->modify(rib, ip, depth, 0, RTE_RIB_DEL);
+}
diff --git a/lib/librte_rib/rte_rib.h b/lib/librte_rib/rte_rib.h
new file mode 100644
index 0000000..6eac8fb
--- /dev/null
+++ b/lib/librte_rib/rte_rib.h
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ */
+
+#ifndef _RTE_RIB_H_
+#define _RTE_RIB_H_
+
+/**
+ * @file
+ * Compressed trie implementation for Longest Prefix Match
+ */
+
+/** @internal Macro to enable/disable run-time checks. */
+#if defined(RTE_LIBRTE_RIB_DEBUG)
+#define RTE_RIB_RETURN_IF_TRUE(cond, retval) do { \
+ if (cond) \
+ return retval; \
+} while (0)
+#else
+#define RTE_RIB_RETURN_IF_TRUE(cond, retval)
+#endif
+
+#define RTE_RIB_VALID_NODE 1
+#define RTE_RIB_GET_NXT_ALL 0
+#define RTE_RIB_GET_NXT_COVER 1
+
+#define RTE_RIB_INVALID_ROUTE 0
+#define RTE_RIB_VALID_ROUTE 1
+
+/** Max number of characters in RIB name. */
+#define RTE_RIB_NAMESIZE 64
+
+/** Maximum depth value possible for IPv4 RIB. */
+#define RTE_RIB_MAXDEPTH 32
+
+/**
+ * Macro to check if prefix1 {key1/depth1}
+ * is covered by prefix2 {key2/depth2}
+ */
+#define RTE_RIB_IS_COVERED(key1, depth1, key2, depth2) \
+ ((((key1 ^ key2) & (uint32_t)(UINT64_MAX << (32 - depth2))) == 0)\
+ && (depth1 > depth2))
+
+/** @internal Macro to get next node in tree*/
+#define RTE_RIB_GET_NXT_NODE(node, key) \
+ ((key & (1 << (31 - node->depth))) ? node->right : node->left)
+/** @internal Macro to check if node is right child*/
+#define RTE_RIB_IS_RIGHT_NODE(node) (node->parent->right == node)
+
+
+struct rte_rib_node {
+ struct rte_rib_node *left;
+ struct rte_rib_node *right;
+ struct rte_rib_node *parent;
+ uint32_t key;
+ uint8_t depth;
+ uint8_t flag;
+ uint64_t nh;
+ uint64_t ext[0];
+};
+
+struct rte_rib;
+
+/** Type of FIB struct*/
+enum rte_rib_type {
+ RTE_RIB_DIR24_8_1B,
+ RTE_RIB_DIR24_8_2B,
+ RTE_RIB_DIR24_8_4B,
+ RTE_RIB_DIR24_8_8B,
+ RTE_RIB_TYPE_MAX
+};
+
+enum rte_rib_op {
+ RTE_RIB_ADD,
+ RTE_RIB_DEL
+};
+
+/** RIB nodes allocation type */
+enum rte_rib_alloc_type {
+ RTE_RIB_MALLOC,
+ RTE_RIB_MEMPOOL,
+ RTE_RIB_ALLOC_MAX
+};
+
+typedef int (*rte_rib_modify_fn_t)(struct rte_rib *rib, uint32_t key,
+ uint8_t depth, uint64_t next_hop, enum rte_rib_op op);
+typedef int (*rte_rib_tree_lookup_fn_t)(void *fib, const uint32_t *ips,
+ uint64_t *next_hops, const unsigned n);
+typedef struct rte_rib_node *(*rte_rib_alloc_node_fn_t)(struct rte_rib *rib);
+typedef void (*rte_rib_free_node_fn_t)(struct rte_rib *rib,
+ struct rte_rib_node *node);
+
+struct rte_rib {
+ char name[RTE_RIB_NAMESIZE];
+ /*pointer to rib trie*/
+ struct rte_rib_node *trie;
+ /*pointer to dataplane struct*/
+ void *fib;
+ /*prefix modification*/
+ rte_rib_modify_fn_t modify;
+ /* Bulk lookup fn*/
+ rte_rib_tree_lookup_fn_t lookup;
+ /*alloc trie element*/
+ rte_rib_alloc_node_fn_t alloc_node;
+ /*free trie element*/
+ rte_rib_free_node_fn_t free_node;
+ struct rte_mempool *node_pool;
+ uint32_t cur_nodes;
+ uint32_t cur_routes;
+ int max_nodes;
+ int node_sz;
+ enum rte_rib_type type;
+ enum rte_rib_alloc_type alloc_type;
+};
+
+/** RIB configuration structure */
+struct rte_rib_conf {
+ enum rte_rib_type type;
+ enum rte_rib_alloc_type alloc_type;
+ int max_nodes;
+ size_t node_sz;
+ uint64_t def_nh;
+};
+
+/**
+ * Lookup an IP into the RIB structure
+ *
+ * @param rib
+ * RIB object handle
+ * @param key
+ * IP to be looked up in the RIB
+ * @return
+ * pointer to struct rte_rib_node on success,
+ * NULL otherwise
+ */
+struct rte_rib_node *
+rte_rib_tree_lookup(struct rte_rib *rib, uint32_t key);
+
+/**
+ * Lookup less specific route into the RIB structure
+ *
+ * @param ent
+ * Pointer to struct rte_rib_node that represents target route
+ * @return
+ * pointer to struct rte_rib_node that represents
+ * less specific route on success,
+ * NULL otherwise
+ */
+struct rte_rib_node *
+rte_rib_tree_lookup_parent(struct rte_rib_node *ent);
+
+/**
+ * Lookup prefix into the RIB structure
+ *
+ * @param rib
+ * RIB object handle
+ * @param key
+ * net to be looked up in the RIB
+ * @param depth
+ * prefix length
+ * @return
+ * pointer to struct rte_rib_node on success,
+ * NULL otherwise
+ */
+struct rte_rib_node *
+rte_rib_tree_lookup_exact(struct rte_rib *rib, uint32_t key, uint8_t depth);
+
+/**
+ * Retrieve next more specific prefix from the RIB
+ * that is covered by key/depth supernet
+ *
+ * @param rib
+ * RIB object handle
+ * @param key
+ * net address of supernet prefix that covers returned more specific prefixes
+ * @param depth
+ * supernet prefix length
+ * @param cur
+ * pointer to the last returned prefix to get next prefix
+ * or
+ * NULL to get first more specific prefix
+ * @param flag
+ * -RTE_RIB_GET_NXT_ALL
+ * get all prefixes from subtrie
+ * -RTE_RIB_GET_NXT_COVER
+ * get only first more specific prefix even if it have more specifics
+ * @return
+ * pointer to the next more specific prefix
+ * or
+ * NULL if there is no prefixes left
+ */
+struct rte_rib_node *
+rte_rib_tree_get_nxt(struct rte_rib *rib, uint32_t key, uint8_t depth,
+ struct rte_rib_node *cur, int flag);
+
+/**
+ * Remove prefix from the RIB
+ *
+ * @param rib
+ * RIB object handle
+ * @param key
+ * net to be removed from the RIB
+ * @param depth
+ * prefix length
+ */
+void
+rte_rib_tree_remove(struct rte_rib *rib, uint32_t key, uint8_t depth);
+
+/**
+ * Insert prefix into the RIB
+ *
+ * @param rib
+ * RIB object handle
+ * @param key
+ * net to be inserted to the RIB
+ * @param depth
+ * prefix length
+ * @return
+ * pointer to new rte_rib_node on success
+ * NULL otherwise
+ */
+struct rte_rib_node *
+rte_rib_tree_insert(struct rte_rib *rib, uint32_t key, uint8_t depth);
+
+/**
+ * Create RIB
+ *
+ * @param name
+ * RIB name
+ * @param socket_id
+ * NUMA socket ID for RIB table memory allocation
+ * @param conf
+ * Structure containing the configuration
+ * @return
+ * Handle to RIB object on success
+ * NULL otherwise with rte_errno set to an appropriate values.
+ */
+struct rte_rib *
+rte_rib_create(const char *name, int socket_id, struct rte_rib_conf *conf);
+
+/**
+ * Find an existing RIB object and return a pointer to it.
+ *
+ * @param name
+ * Name of the rib object as passed to rte_rib_create()
+ * @return
+ * Pointer to rib object or NULL if object not found with rte_errno
+ * set appropriately. Possible rte_errno values include:
+ * - ENOENT - required entry not available to return.
+ */
+struct rte_rib *
+rte_rib_find_existing(const char *name);
+
+/**
+ * Free an RIB object.
+ *
+ * @param rib
+ * RIB object handle
+ * @return
+ * None
+ */
+void
+rte_rib_free(struct rte_rib *rib);
+
+/**
+ * Add a rule to the RIB.
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * IP of the rule to be added to the RIB
+ * @param depth
+ * Depth of the rule to be added to the RIB
+ * @param next_hop
+ * Next hop of the rule to be added to the RIB
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_rib_add(struct rte_rib *rib, uint32_t ip, uint8_t depth, uint64_t next_hop);
+
+/**
+ * Delete a rule from the RIB.
+ *
+ * @param rib
+ * RIB object handle
+ * @param ip
+ * IP of the rule to be deleted from the RIB
+ * @param depth
+ * Depth of the rule to be deleted from the RIB
+ * @return
+ * 0 on success, negative value otherwise
+ */
+int
+rte_rib_delete(struct rte_rib *rib, uint32_t ip, uint8_t depth);
+
+/**
+ * Lookup multiple IP addresses in an FIB. This may be implemented as a
+ * macro, so the address of the function should not be used.
+ *
+ * @param RIB
+ * RIB object handle
+ * @param ips
+ * Array of IPs to be looked up in the FIB
+ * @param next_hops
+ * Next hop of the most specific rule found for IP.
+ * This is an array of eight byte values.
+ * If the lookup for the given IP failed, then corresponding element would
+ * contain default value, see description of then next parameter.
+ * @param n
+ * Number of elements in ips (and next_hops) array to lookup. This should be a
+ * compile time constant, and divisible by 8 for best performance.
+ * @param defv
+ * Default value to populate into corresponding element of hop[] array,
+ * if lookup would fail.
+ * @return
+ * -EINVAL for incorrect arguments, otherwise 0
+ */
+#define rte_rib_fib_lookup_bulk(rib, ips, next_hops, n) \
+ rib->lookup(rib->fib, ips, next_hops, n)
+
+#endif /* _RTE_RIB_H_ */
diff --git a/lib/librte_rib/rte_rib_version.map b/lib/librte_rib/rte_rib_version.map
new file mode 100644
index 0000000..b193d6c
--- /dev/null
+++ b/lib/librte_rib/rte_rib_version.map
@@ -0,0 +1,18 @@
+DPDK_17.08 {
+ global:
+
+ rte_rib_create;
+ rte_rib_free;
+ rte_rib_tree_lookup;
+ rte_rib_tree_lookup_parent;
+ rte_rib_tree_lookup_exact;
+ rte_rib_tree_get_nxt;
+ rte_rib_tree_remove;
+ rte_rib_tree_insert;
+ rte_rib_find_existing;
+ rte_rib_add;
+ rte_rib_delete;
+ rte_rib_delete_all;
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 3eb41d1..4708aa4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -70,6 +70,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_GRO) += -lrte_gro
_LDLIBS-$(CONFIG_RTE_LIBRTE_GSO) += -lrte_gso
_LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter
_LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm
+_LDLIBS-$(CONFIG_RTE_LIBRTE_RIB) += -lrte_rib
# librte_acl needs --whole-archive because of weak functions
_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl
--
1.9.1
next prev parent reply other threads:[~2018-02-21 21:09 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-21 21:44 [dpdk-dev] [PATCH v2 0/2] lib/rib: Add Routing Information Base library Medvedkin Vladimir
2018-02-21 21:44 ` Medvedkin Vladimir [this message]
2018-03-14 11:09 ` [dpdk-dev] [PATCH v2 1/2] Add RIB library Bruce Richardson
2018-03-14 12:05 ` Richardson, Bruce
2018-03-25 18:17 ` Vladimir Medvedkin
2018-03-26 9:50 ` Bruce Richardson
2018-03-29 19:59 ` Vladimir Medvedkin
2018-03-29 10:27 ` Bruce Richardson
2018-03-29 20:11 ` Vladimir Medvedkin
2018-03-29 20:41 ` Bruce Richardson
2018-02-21 21:44 ` [dpdk-dev] [PATCH v2 2/2] Add autotests for " Medvedkin Vladimir
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1519249495-16594-2-git-send-email-medvedkinv@gmail.com \
--to=medvedkinv@gmail.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).