From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id C48EC12A8 for ; Tue, 8 Sep 2015 15:19:02 +0200 (CEST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga103.fm.intel.com with ESMTP; 08 Sep 2015 06:18:59 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.17,490,1437462000"; d="scan'208";a="800031565" Received: from unknown (HELO stargo) ([10.217.248.233]) by orsmga002.jf.intel.com with SMTP; 08 Sep 2015 06:18:57 -0700 Received: by stargo (sSMTP sendmail emulation); Tue, 08 Sep 2015 15:21:14 +0200 From: Michal Kobylinski To: dev@dpdk.org Date: Tue, 8 Sep 2015 15:21:11 +0200 Message-Id: <1441718471-18941-1-git-send-email-michalx.kobylinski@intel.com> X-Mailer: git-send-email 1.9.1 Subject: [dpdk-dev] [RFC PATCH] lpm: increase number of next hops for lpm (ipv4) X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 08 Sep 2015 13:19:04 -0000 From: mkobylix The current DPDK implementation for LPM for IPv4 and IPv6 limits the number of next hops to 256, as the next hop ID is an 8-bit long field. Proposed extension increase number of next hops for IPv4 to 2^24 and also allows 32-bits read/write operations. Signed-off-by: Michal Kobylinski --- app/test/test_lpm.c | 75 +++++++++-------- lib/librte_lpm/rte_lpm.c | 51 ++++++------ lib/librte_lpm/rte_lpm.h | 168 ++++++++++++++++++++------------------ lib/librte_table/rte_table_lpm.c | 6 +- 4 files changed, 160 insertions(+), 140 deletions(-) diff --git a/app/test/test_lpm.c b/app/test/test_lpm.c index 8b4ded9..e7af796 100644 --- a/app/test/test_lpm.c +++ b/app/test/test_lpm.c @@ -278,7 +278,8 @@ test6(void) { struct rte_lpm *lpm = NULL; uint32_t ip = IPv4(0, 0, 0, 0); - uint8_t depth = 24, next_hop_add = 100, next_hop_return = 0; + uint8_t depth = 24; + uint32_t next_hop_add = 100, next_hop_return = 0; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -309,10 +310,11 @@ int32_t test7(void) { __m128i ipx4; - uint16_t hop[4]; + uint32_t hop[4]; struct rte_lpm *lpm = NULL; uint32_t ip = IPv4(0, 0, 0, 0); - uint8_t depth = 32, next_hop_add = 100, next_hop_return = 0; + uint8_t depth = 32; + uint32_t next_hop_add = 100, next_hop_return = 0; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -325,10 +327,10 @@ test7(void) TEST_LPM_ASSERT((status == 0) && (next_hop_return == next_hop_add)); ipx4 = _mm_set_epi32(ip, ip + 0x100, ip - 0x100, ip); - rte_lpm_lookupx4(lpm, ipx4, hop, UINT16_MAX); + rte_lpm_lookupx4(lpm, ipx4, hop, UINT32_MAX); TEST_LPM_ASSERT(hop[0] == next_hop_add); - TEST_LPM_ASSERT(hop[1] == UINT16_MAX); - TEST_LPM_ASSERT(hop[2] == UINT16_MAX); + TEST_LPM_ASSERT(hop[1] == UINT32_MAX); + TEST_LPM_ASSERT(hop[2] == UINT32_MAX); TEST_LPM_ASSERT(hop[3] == next_hop_add); status = rte_lpm_delete(lpm, ip, depth); @@ -355,10 +357,11 @@ int32_t test8(void) { __m128i ipx4; - uint16_t hop[4]; + uint32_t hop[4]; struct rte_lpm *lpm = NULL; uint32_t ip1 = IPv4(127, 255, 255, 255), ip2 = IPv4(128, 0, 0, 0); - uint8_t depth, next_hop_add, next_hop_return; + uint8_t depth; + uint32_t next_hop_add, next_hop_return; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -381,10 +384,10 @@ test8(void) (next_hop_return == next_hop_add)); ipx4 = _mm_set_epi32(ip2, ip1, ip2, ip1); - rte_lpm_lookupx4(lpm, ipx4, hop, UINT16_MAX); - TEST_LPM_ASSERT(hop[0] == UINT16_MAX); + rte_lpm_lookupx4(lpm, ipx4, hop, UINT32_MAX); + TEST_LPM_ASSERT(hop[0] == UINT32_MAX); TEST_LPM_ASSERT(hop[1] == next_hop_add); - TEST_LPM_ASSERT(hop[2] == UINT16_MAX); + TEST_LPM_ASSERT(hop[2] == UINT32_MAX); TEST_LPM_ASSERT(hop[3] == next_hop_add); } @@ -409,16 +412,16 @@ test8(void) TEST_LPM_ASSERT(status == -ENOENT); ipx4 = _mm_set_epi32(ip1, ip1, ip2, ip2); - rte_lpm_lookupx4(lpm, ipx4, hop, UINT16_MAX); + rte_lpm_lookupx4(lpm, ipx4, hop, UINT32_MAX); if (depth != 1) { TEST_LPM_ASSERT(hop[0] == next_hop_add); TEST_LPM_ASSERT(hop[1] == next_hop_add); } else { - TEST_LPM_ASSERT(hop[0] == UINT16_MAX); - TEST_LPM_ASSERT(hop[1] == UINT16_MAX); + TEST_LPM_ASSERT(hop[0] == UINT32_MAX); + TEST_LPM_ASSERT(hop[1] == UINT32_MAX); } - TEST_LPM_ASSERT(hop[2] == UINT16_MAX); - TEST_LPM_ASSERT(hop[3] == UINT16_MAX); + TEST_LPM_ASSERT(hop[2] == UINT32_MAX); + TEST_LPM_ASSERT(hop[3] == UINT32_MAX); } rte_lpm_free(lpm); @@ -438,7 +441,8 @@ test9(void) { struct rte_lpm *lpm = NULL; uint32_t ip, ip_1, ip_2; - uint8_t depth, depth_1, depth_2, next_hop_add, next_hop_add_1, + uint8_t depth, depth_1, depth_2; + uint32_t next_hop_add, next_hop_add_1, next_hop_add_2, next_hop_return; int32_t status = 0; @@ -602,7 +606,8 @@ test10(void) struct rte_lpm *lpm = NULL; uint32_t ip; - uint8_t depth, next_hop_add, next_hop_return; + uint8_t depth; + uint32_t next_hop_add, next_hop_return; int32_t status = 0; /* Add rule that covers a TBL24 range previously invalid & lookup @@ -788,7 +793,8 @@ test11(void) struct rte_lpm *lpm = NULL; uint32_t ip; - uint8_t depth, next_hop_add, next_hop_return; + uint8_t depth; + uint32_t next_hop_add, next_hop_return; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -851,10 +857,11 @@ int32_t test12(void) { __m128i ipx4; - uint16_t hop[4]; + uint32_t hop[4]; struct rte_lpm *lpm = NULL; uint32_t ip, i; - uint8_t depth, next_hop_add, next_hop_return; + uint8_t depth; + uint32_t next_hop_add, next_hop_return; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -873,10 +880,10 @@ test12(void) (next_hop_return == next_hop_add)); ipx4 = _mm_set_epi32(ip, ip + 1, ip, ip - 1); - rte_lpm_lookupx4(lpm, ipx4, hop, UINT16_MAX); - TEST_LPM_ASSERT(hop[0] == UINT16_MAX); + rte_lpm_lookupx4(lpm, ipx4, hop, UINT32_MAX); + TEST_LPM_ASSERT(hop[0] == UINT32_MAX); TEST_LPM_ASSERT(hop[1] == next_hop_add); - TEST_LPM_ASSERT(hop[2] == UINT16_MAX); + TEST_LPM_ASSERT(hop[2] == UINT32_MAX); TEST_LPM_ASSERT(hop[3] == next_hop_add); status = rte_lpm_delete(lpm, ip, depth); @@ -904,7 +911,8 @@ test13(void) { struct rte_lpm *lpm = NULL; uint32_t ip, i; - uint8_t depth, next_hop_add_1, next_hop_add_2, next_hop_return; + uint8_t depth; + uint32_t next_hop_add_1, next_hop_add_2, next_hop_return; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -966,7 +974,8 @@ test14(void) struct rte_lpm *lpm = NULL; uint32_t ip; - uint8_t depth, next_hop_add, next_hop_return; + uint8_t depth; + uint32_t next_hop_add, next_hop_return; int32_t status = 0; /* Add enough space for 256 rules for every depth */ @@ -1078,10 +1087,10 @@ test17(void) const uint8_t d_ip_10_32 = 32, d_ip_10_24 = 24, d_ip_20_25 = 25; - const uint8_t next_hop_ip_10_32 = 100, + const uint32_t next_hop_ip_10_32 = 100, next_hop_ip_10_24 = 105, next_hop_ip_20_25 = 111; - uint8_t next_hop_return = 0; + uint32_t next_hop_return = 0; int32_t status = 0; lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0); @@ -1175,7 +1184,7 @@ perf_test(void) struct rte_lpm *lpm = NULL; uint64_t begin, total_time, lpm_used_entries = 0; unsigned i, j; - uint8_t next_hop_add = 0xAA, next_hop_return = 0; + uint32_t next_hop_add = 0xAA, next_hop_return = 0; int status = 0; uint64_t cache_line_counter = 0; int64_t count = 0; @@ -1252,7 +1261,7 @@ perf_test(void) count = 0; for (i = 0; i < ITERATIONS; i ++) { static uint32_t ip_batch[BATCH_SIZE]; - uint16_t next_hops[BULK_SIZE]; + uint32_t next_hops[BULK_SIZE]; /* Create array of random IP addresses */ for (j = 0; j < BATCH_SIZE; j ++) @@ -1279,7 +1288,7 @@ perf_test(void) count = 0; for (i = 0; i < ITERATIONS; i++) { static uint32_t ip_batch[BATCH_SIZE]; - uint16_t next_hops[4]; + uint32_t next_hops[4]; /* Create array of random IP addresses */ for (j = 0; j < BATCH_SIZE; j++) @@ -1293,9 +1302,9 @@ perf_test(void) ipx4 = _mm_loadu_si128((__m128i *)(ip_batch + j)); ipx4 = *(__m128i *)(ip_batch + j); - rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT16_MAX); + rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); for (k = 0; k < RTE_DIM(next_hops); k++) - if (unlikely(next_hops[k] == UINT16_MAX)) + if (unlikely(next_hops[k] == UINT32_MAX)) count++; } diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index 163ba3c..d662a4d 100644 --- a/lib/librte_lpm/rte_lpm.c +++ b/lib/librte_lpm/rte_lpm.c @@ -159,8 +159,8 @@ rte_lpm_create(const char *name, int socket_id, int max_rules, lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); - RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl24_entry) != 2); - RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl8_entry) != 2); + RTE_BUILD_BUG_ON(sizeof(union rte_lpm_tbl24_entry) != 4); + RTE_BUILD_BUG_ON(sizeof(union rte_lpm_tbl8_entry) != 4); /* Check user arguments. */ if ((name == NULL) || (socket_id < -1) || (max_rules == 0)){ @@ -261,7 +261,7 @@ rte_lpm_free(struct rte_lpm *lpm) */ static inline int32_t rule_add(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, - uint8_t next_hop) + uint32_t next_hop) { uint32_t rule_gindex, rule_index, last_rule; int i; @@ -382,10 +382,10 @@ rule_find(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth) * Find, clean and allocate a tbl8. */ static inline int32_t -tbl8_alloc(struct rte_lpm_tbl8_entry *tbl8) +tbl8_alloc(union rte_lpm_tbl8_entry *tbl8) { uint32_t tbl8_gindex; /* tbl8 group index. */ - struct rte_lpm_tbl8_entry *tbl8_entry; + union rte_lpm_tbl8_entry *tbl8_entry; /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */ for (tbl8_gindex = 0; tbl8_gindex < RTE_LPM_TBL8_NUM_GROUPS; @@ -410,7 +410,7 @@ tbl8_alloc(struct rte_lpm_tbl8_entry *tbl8) } static inline void -tbl8_free(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) +tbl8_free(union rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) { /* Set tbl8 group invalid*/ tbl8[tbl8_group_start].valid_group = INVALID; @@ -418,7 +418,7 @@ tbl8_free(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) static inline int32_t add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, - uint8_t next_hop) + uint32_t next_hop) { uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j; @@ -434,8 +434,8 @@ add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, if (!lpm->tbl24[i].valid || (lpm->tbl24[i].ext_entry == 0 && lpm->tbl24[i].depth <= depth)) { - struct rte_lpm_tbl24_entry new_tbl24_entry = { - { .next_hop = next_hop, }, + union rte_lpm_tbl24_entry new_tbl24_entry = { + .next_hop = next_hop, .valid = VALID, .ext_entry = 0, .depth = depth, @@ -461,8 +461,7 @@ add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, for (j = tbl8_index; j < tbl8_group_end; j++) { if (!lpm->tbl8[j].valid || lpm->tbl8[j].depth <= depth) { - struct rte_lpm_tbl8_entry - new_tbl8_entry = { + union rte_lpm_tbl8_entry new_tbl8_entry = { .valid = VALID, .valid_group = VALID, .depth = depth, @@ -486,7 +485,7 @@ add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, static inline int32_t add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, - uint8_t next_hop) + uint32_t next_hop) { uint32_t tbl24_index; int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index, @@ -522,8 +521,8 @@ add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, * so assign whole structure in one go */ - struct rte_lpm_tbl24_entry new_tbl24_entry = { - { .tbl8_gindex = (uint8_t)tbl8_group_index, }, + union rte_lpm_tbl24_entry new_tbl24_entry = { + .next_hop = (uint8_t)tbl8_group_index, .valid = VALID, .ext_entry = 1, .depth = 0, @@ -573,8 +572,8 @@ add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, * so assign whole structure in one go. */ - struct rte_lpm_tbl24_entry new_tbl24_entry = { - { .tbl8_gindex = (uint8_t)tbl8_group_index, }, + union rte_lpm_tbl24_entry new_tbl24_entry = { + .next_hop = (uint8_t)tbl8_group_index, .valid = VALID, .ext_entry = 1, .depth = 0, @@ -595,7 +594,7 @@ add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, if (!lpm->tbl8[i].valid || lpm->tbl8[i].depth <= depth) { - struct rte_lpm_tbl8_entry new_tbl8_entry = { + union rte_lpm_tbl8_entry new_tbl8_entry = { .valid = VALID, .depth = depth, .next_hop = next_hop, @@ -621,7 +620,7 @@ add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, */ int rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, - uint8_t next_hop) + uint32_t next_hop) { int32_t rule_index, status = 0; uint32_t ip_masked; @@ -665,7 +664,7 @@ rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, */ int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, -uint8_t *next_hop) +uint32_t *next_hop) { uint32_t ip_masked; int32_t rule_index; @@ -761,14 +760,14 @@ delete_depth_small(struct rte_lpm *lpm, uint32_t ip_masked, * associated with this rule. */ - struct rte_lpm_tbl24_entry new_tbl24_entry = { - {.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,}, + union rte_lpm_tbl24_entry new_tbl24_entry = { + .next_hop = lpm->rules_tbl[sub_rule_index].next_hop, .valid = VALID, .ext_entry = 0, .depth = sub_rule_depth, }; - struct rte_lpm_tbl8_entry new_tbl8_entry = { + union rte_lpm_tbl8_entry new_tbl8_entry = { .valid = VALID, .depth = sub_rule_depth, .next_hop = lpm->rules_tbl @@ -814,7 +813,7 @@ delete_depth_small(struct rte_lpm *lpm, uint32_t ip_masked, * thus can be recycled */ static inline int32_t -tbl8_recycle_check(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) +tbl8_recycle_check(union rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) { uint32_t tbl8_group_end, i; tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; @@ -891,7 +890,7 @@ delete_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, } else { /* Set new tbl8 entry. */ - struct rte_lpm_tbl8_entry new_tbl8_entry = { + union rte_lpm_tbl8_entry new_tbl8_entry = { .valid = VALID, .depth = sub_rule_depth, .valid_group = lpm->tbl8[tbl8_group_start].valid_group, @@ -923,8 +922,8 @@ delete_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, } else if (tbl8_recycle_index > -1) { /* Update tbl24 entry. */ - struct rte_lpm_tbl24_entry new_tbl24_entry = { - { .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, }, + union rte_lpm_tbl24_entry new_tbl24_entry = { + .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, .valid = VALID, .ext_entry = 0, .depth = lpm->tbl8[tbl8_recycle_index].depth, diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index c299ce2..c134539 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -82,56 +82,66 @@ extern "C" { #endif /** @internal bitmask with valid and ext_entry/valid_group fields set */ -#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x0300 +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000 /** Bitmask used to indicate successful lookup */ -#define RTE_LPM_LOOKUP_SUCCESS 0x0100 +#define RTE_LPM_LOOKUP_SUCCESS 0x01000000 + +/** Bitmask used to get 24-bits value next hop from uint32_t **/ +#define RTE_LPM_NEXT_HOP_MASK 0x00ffffff #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN /** @internal Tbl24 entry structure. */ -struct rte_lpm_tbl24_entry { - /* Stores Next hop or group index (i.e. gindex)into tbl8. */ - union { - uint8_t next_hop; - uint8_t tbl8_gindex; +union rte_lpm_tbl24_entry { +uint32_t entry; +struct { + uint32_t next_hop :24;/**< next hop. */ + uint32_t valid :1; /**< Validation flag. */ + uint32_t ext_entry :1; /**< External entry. */ + uint32_t depth :6; /**< Rule depth. */ }; - /* Using single uint8_t to store 3 values. */ - uint8_t valid :1; /**< Validation flag. */ - uint8_t ext_entry :1; /**< External entry. */ - uint8_t depth :6; /**< Rule depth. */ }; +/* Store group index (i.e. gindex)into tbl8. */ +#define tbl8_gindex next_hop + /** @internal Tbl8 entry structure. */ -struct rte_lpm_tbl8_entry { - uint8_t next_hop; /**< next hop. */ - /* Using single uint8_t to store 3 values. */ - uint8_t valid :1; /**< Validation flag. */ - uint8_t valid_group :1; /**< Group validation flag. */ - uint8_t depth :6; /**< Rule depth. */ +union rte_lpm_tbl8_entry { +uint32_t entry; +struct { + uint32_t next_hop :24; /**< next hop. */ + uint32_t valid :1; /**< Validation flag. */ + uint32_t valid_group :1; /**< External entry. */ + uint32_t depth :6; /**< Rule depth. */ + }; }; #else -struct rte_lpm_tbl24_entry { - uint8_t depth :6; - uint8_t ext_entry :1; - uint8_t valid :1; - union { - uint8_t tbl8_gindex; - uint8_t next_hop; +union rte_lpm_tbl24_entry { +struct { + uint32_t depth :6; + uint32_t ext_entry :1; + uint32_t valid :1; + uint32_t next_hop :24; }; +uint32_t entry; }; - -struct rte_lpm_tbl8_entry { - uint8_t depth :6; - uint8_t valid_group :1; - uint8_t valid :1; - uint8_t next_hop; +#define tbl8_gindex next_hop + +union rte_lpm_tbl8_entry { +struct { + uint32_t depth :6; + uint32_t valid_group :1; + uint32_t valid :1; + uint32_t next_hop :24; + }; +uint32_t entry; }; #endif /** @internal Rule structure. */ struct rte_lpm_rule { uint32_t ip; /**< Rule IP address. */ - uint8_t next_hop; /**< Rule next hop. */ + uint32_t next_hop; /**< Rule next hop. */ }; /** @internal Contains metadata about the rules table. */ @@ -148,9 +158,9 @@ struct rte_lpm { struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */ /* LPM Tables. */ - struct rte_lpm_tbl24_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES] \ + union rte_lpm_tbl24_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES] \ __rte_cache_aligned; /**< LPM tbl24 table. */ - struct rte_lpm_tbl8_entry tbl8[RTE_LPM_TBL8_NUM_ENTRIES] \ + union rte_lpm_tbl8_entry tbl8[RTE_LPM_TBL8_NUM_ENTRIES] \ __rte_cache_aligned; /**< LPM tbl8 table. */ struct rte_lpm_rule rules_tbl[0] \ __rte_cache_aligned; /**< LPM rules. */ @@ -219,7 +229,7 @@ rte_lpm_free(struct rte_lpm *lpm); * 0 on success, negative value otherwise */ int -rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t next_hop); +rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint32_t next_hop); /** * Check if a rule is present in the LPM table, @@ -238,7 +248,7 @@ rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t next_hop); */ int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, -uint8_t *next_hop); +uint32_t *next_hop); /** * Delete a rule from the LPM table. @@ -277,16 +287,16 @@ rte_lpm_delete_all(struct rte_lpm *lpm); * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit */ static inline int -rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop) +rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) { unsigned tbl24_index = (ip >> 8); - uint16_t tbl_entry; + uint32_t tbl_entry; /* DEBUG: Check user input arguments. */ RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL); /* Copy tbl24 entry */ - tbl_entry = *(const uint16_t *)&lpm->tbl24[tbl24_index]; + tbl_entry = lpm->tbl24[tbl24_index].entry; /* Copy tbl8 entry (only if needed) */ if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == @@ -295,10 +305,10 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop) unsigned tbl8_index = (uint8_t)ip + ((uint8_t)tbl_entry * RTE_LPM_TBL8_GROUP_NUM_ENTRIES); - tbl_entry = *(const uint16_t *)&lpm->tbl8[tbl8_index]; + tbl_entry = lpm->tbl8[tbl8_index].entry; } - *next_hop = (uint8_t)tbl_entry; + *next_hop = tbl_entry & RTE_LPM_NEXT_HOP_MASK; return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT; } @@ -327,7 +337,7 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop) static inline int rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, - uint16_t * next_hops, const unsigned n) + uint32_t *next_hops, const unsigned n) { unsigned i; unsigned tbl24_indexes[n]; @@ -342,7 +352,7 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, for (i = 0; i < n; i++) { /* Simply copy tbl24 entry to output */ - next_hops[i] = *(const uint16_t *)&lpm->tbl24[tbl24_indexes[i]]; + next_hops[i] = lpm->tbl24[tbl24_indexes[i]].entry; /* Overwrite output with tbl8 entry if needed */ if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == @@ -352,14 +362,14 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, ((uint8_t)next_hops[i] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES); - next_hops[i] = *(const uint16_t *)&lpm->tbl8[tbl8_index]; + next_hops[i] = lpm->tbl8[tbl8_index].entry; } } return 0; } /* Mask four results. */ -#define RTE_LPM_MASKX4_RES UINT64_C(0x00ff00ff00ff00ff) +#define RTE_LPM_MASKX2_RES UINT64_C(0x00ffffff00ffffff) /** * Lookup four IP addresses in an LPM table. @@ -381,36 +391,32 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, * if lookup would fail. */ static inline void -rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], - uint16_t defv) +rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint32_t hop[4], + uint32_t defv) { __m128i i24; rte_xmm_t i8; - uint16_t tbl[4]; - uint64_t idx, pt; + uint32_t tbl[4]; + uint64_t idx, pt, pt2; const __m128i mask8 = _mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX); /* - * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries - * as one 64-bit value (0x0300030003000300). + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries + * as one 64-bit value (0x0300000003000000). */ const uint64_t mask_xv = ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | - (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 16 | - (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32 | - (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 48); + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); /* - * RTE_LPM_LOOKUP_SUCCESS for 4 LPM entries - * as one 64-bit value (0x0100010001000100). + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries + * as one 64-bit value (0x0100000001000000). */ const uint64_t mask_v = ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | - (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 16 | - (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 | - (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48); + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); /* get 4 indexes for tbl24[]. */ i24 = _mm_srli_epi32(ip, CHAR_BIT); @@ -419,26 +425,28 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], idx = _mm_cvtsi128_si64(i24); i24 = _mm_srli_si128(i24, sizeof(uint64_t)); - tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; - tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + tbl[0] = lpm->tbl24[(uint32_t)idx].entry; + tbl[1] = lpm->tbl24[idx >> 32].entry; idx = _mm_cvtsi128_si64(i24); - tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; - tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + tbl[2] = lpm->tbl24[(uint32_t)idx].entry; + tbl[3] = lpm->tbl24[idx >> 32].entry; /* get 4 indexes for tbl8[]. */ i8.x = _mm_and_si128(ip, mask8); pt = (uint64_t)tbl[0] | - (uint64_t)tbl[1] << 16 | - (uint64_t)tbl[2] << 32 | - (uint64_t)tbl[3] << 48; + (uint64_t)tbl[1] << 32; + pt2 = (uint64_t)tbl[2] | + (uint64_t)tbl[3] << 32; /* search successfully finished for all 4 IP addresses. */ - if (likely((pt & mask_xv) == mask_v)) { + if (likely((pt & mask_xv) == mask_v) && + likely((pt2 & mask_xv) == mask_v)) { uintptr_t ph = (uintptr_t)hop; - *(uint64_t *)ph = pt & RTE_LPM_MASKX4_RES; + *(uint64_t *)ph = pt & RTE_LPM_MASKX2_RES; + *(uint64_t *)(ph + 2) = pt2 & RTE_LPM_MASKX2_RES; return; } @@ -446,31 +454,35 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[0] = i8.u32[0] + (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; - tbl[0] = *(const uint16_t *)&lpm->tbl8[i8.u32[0]]; + tbl[0] = lpm->tbl8[i8.u32[0]].entry; } - if (unlikely((pt >> 16 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[1] = i8.u32[1] + (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; - tbl[1] = *(const uint16_t *)&lpm->tbl8[i8.u32[1]]; + tbl[1] = lpm->tbl8[i8.u32[1]].entry; } - if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[2] = i8.u32[2] + (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; - tbl[2] = *(const uint16_t *)&lpm->tbl8[i8.u32[2]]; + tbl[2] = lpm->tbl8[i8.u32[2]].entry; } - if (unlikely((pt >> 48 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[3] = i8.u32[3] + (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; - tbl[3] = *(const uint16_t *)&lpm->tbl8[i8.u32[3]]; + tbl[3] = lpm->tbl8[i8.u32[3]].entry; } - hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[0] : defv; - hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[1] : defv; - hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv; - hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv; + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) + ? tbl[0] & RTE_LPM_NEXT_HOP_MASK : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) + ? tbl[1] & RTE_LPM_NEXT_HOP_MASK : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) + ? tbl[2] & RTE_LPM_NEXT_HOP_MASK : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) + ? tbl[3] & RTE_LPM_NEXT_HOP_MASK : defv; } #ifdef __cplusplus diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c index b218d64..3f7f4f6 100644 --- a/lib/librte_table/rte_table_lpm.c +++ b/lib/librte_table/rte_table_lpm.c @@ -198,7 +198,7 @@ rte_table_lpm_entry_add( struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; uint32_t nht_pos, nht_pos0_valid; int status; - uint8_t nht_pos0 = 0; + uint32_t nht_pos0 = 0; /* Check input parameters */ if (lpm == NULL) { @@ -264,7 +264,7 @@ rte_table_lpm_entry_delete( { struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; - uint8_t nht_pos; + uint32_t nht_pos; int status; /* Check input parameters */ @@ -338,7 +338,7 @@ rte_table_lpm_lookup( uint32_t ip = rte_bswap32( RTE_MBUF_METADATA_UINT32(pkt, lpm->offset)); int status; - uint8_t nht_pos; + uint32_t nht_pos; status = rte_lpm_lookup(lpm->lpm, ip, &nht_pos); if (status == 0) { -- 1.7.9.5