DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v2] fib: network byte order IPv4 lookup
@ 2024-10-08 17:16 Vladimir Medvedkin
  2024-10-08 21:26 ` Stephen Hemminger
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Vladimir Medvedkin @ 2024-10-08 17:16 UTC (permalink / raw)
  To: dev; +Cc: rjarry, mb, david.marchand

Previously when running rte_fib_lookup IPv4 addresses must have been in
host byte order.

This patch adds a new flag RTE_FIB_FLAG_LOOKUP_BE that can be passed on
fib create, which will allow to have IPv4 in network byte order on
lookup.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 app/test/test_fib.c      |  2 +-
 lib/fib/dir24_8.c        | 62 +++++++++++++++++++-----------
 lib/fib/dir24_8.h        | 44 ++++++++++++++++++++-
 lib/fib/dir24_8_avx512.c | 82 +++++++++++++++++++++++-----------------
 lib/fib/dir24_8_avx512.h | 15 ++++++++
 lib/fib/meson.build      | 38 +++++++------------
 lib/fib/rte_fib.c        |  7 +++-
 lib/fib/rte_fib.h        |  4 ++
 8 files changed, 169 insertions(+), 85 deletions(-)

diff --git a/app/test/test_fib.c b/app/test/test_fib.c
index 45dccca1f6..b0e53dbe01 100644
--- a/app/test/test_fib.c
+++ b/app/test/test_fib.c
@@ -319,7 +319,7 @@ int32_t
 test_lookup(void)
 {
 	struct rte_fib *fib = NULL;
-	struct rte_fib_conf config;
+	struct rte_fib_conf config = { 0 };
 	uint64_t def_nh = 100;
 	int ret;
 
diff --git a/lib/fib/dir24_8.c b/lib/fib/dir24_8.c
index c739e92304..5520f0f519 100644
--- a/lib/fib/dir24_8.c
+++ b/lib/fib/dir24_8.c
@@ -26,56 +26,72 @@
 #define ROUNDUP(x, y)	 RTE_ALIGN_CEIL(x, (1 << (32 - y)))
 
 static inline rte_fib_lookup_fn_t
-get_scalar_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_scalar_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return dir24_8_lookup_bulk_1b;
+		return (be_addr) ? dir24_8_lookup_bulk_1b_be :
+					dir24_8_lookup_bulk_1b;
 	case RTE_FIB_DIR24_8_2B:
-		return dir24_8_lookup_bulk_2b;
+		return (be_addr) ? dir24_8_lookup_bulk_2b_be :
+					dir24_8_lookup_bulk_2b;
 	case RTE_FIB_DIR24_8_4B:
-		return dir24_8_lookup_bulk_4b;
+		return (be_addr) ? dir24_8_lookup_bulk_4b_be :
+					dir24_8_lookup_bulk_4b;
 	case RTE_FIB_DIR24_8_8B:
-		return dir24_8_lookup_bulk_8b;
+		return (be_addr) ? dir24_8_lookup_bulk_8b_be :
+					dir24_8_lookup_bulk_8b;
 	default:
 		return NULL;
 	}
 }
 
 static inline rte_fib_lookup_fn_t
-get_scalar_fn_inlined(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_scalar_fn_inlined(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return dir24_8_lookup_bulk_0;
+		return (be_addr) ? dir24_8_lookup_bulk_0_be :
+					dir24_8_lookup_bulk_0;
 	case RTE_FIB_DIR24_8_2B:
-		return dir24_8_lookup_bulk_1;
+		return (be_addr) ? dir24_8_lookup_bulk_1_be :
+					dir24_8_lookup_bulk_1;
 	case RTE_FIB_DIR24_8_4B:
-		return dir24_8_lookup_bulk_2;
+		return (be_addr) ? dir24_8_lookup_bulk_2_be :
+					dir24_8_lookup_bulk_2;
 	case RTE_FIB_DIR24_8_8B:
-		return dir24_8_lookup_bulk_3;
+		return (be_addr) ? dir24_8_lookup_bulk_3_be :
+					dir24_8_lookup_bulk_3;
 	default:
 		return NULL;
 	}
 }
 
 static inline rte_fib_lookup_fn_t
-get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 #ifdef CC_DIR24_8_AVX512_SUPPORT
 	if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) <= 0) ||
+		(rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ) <= 0) ||
 			(rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512))
 		return NULL;
 
+	if (be_addr && (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) <= 0))
+		return NULL;
+
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return rte_dir24_8_vec_lookup_bulk_1b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_1b_be :
+					rte_dir24_8_vec_lookup_bulk_1b;
 	case RTE_FIB_DIR24_8_2B:
-		return rte_dir24_8_vec_lookup_bulk_2b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_2b_be :
+					rte_dir24_8_vec_lookup_bulk_2b;
 	case RTE_FIB_DIR24_8_4B:
-		return rte_dir24_8_vec_lookup_bulk_4b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_4b_be :
+					rte_dir24_8_vec_lookup_bulk_4b;
 	case RTE_FIB_DIR24_8_8B:
-		return rte_dir24_8_vec_lookup_bulk_8b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_8b_be :
+					rte_dir24_8_vec_lookup_bulk_8b;
 	default:
 		return NULL;
 	}
@@ -86,7 +102,7 @@ get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
 }
 
 rte_fib_lookup_fn_t
-dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type)
+dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type, bool be_addr)
 {
 	enum rte_fib_dir24_8_nh_sz nh_sz;
 	rte_fib_lookup_fn_t ret_fn;
@@ -99,16 +115,18 @@ dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type)
 
 	switch (type) {
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_MACRO:
-		return get_scalar_fn(nh_sz);
+		return get_scalar_fn(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_INLINE:
-		return get_scalar_fn_inlined(nh_sz);
+		return get_scalar_fn_inlined(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_UNI:
-		return dir24_8_lookup_bulk_uni;
+		return (be_addr) ? dir24_8_lookup_bulk_uni_be :
+						dir24_8_lookup_bulk_uni;
 	case RTE_FIB_LOOKUP_DIR24_8_VECTOR_AVX512:
-		return get_vector_fn(nh_sz);
+		return get_vector_fn(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DEFAULT:
-		ret_fn = get_vector_fn(nh_sz);
-		return (ret_fn != NULL) ? ret_fn : get_scalar_fn(nh_sz);
+		ret_fn = get_vector_fn(nh_sz, be_addr);
+		return (ret_fn != NULL) ? ret_fn :
+			get_scalar_fn(nh_sz, be_addr);
 	default:
 		return NULL;
 	}
diff --git a/lib/fib/dir24_8.h b/lib/fib/dir24_8.h
index 7125049f15..2c776e118f 100644
--- a/lib/fib/dir24_8.h
+++ b/lib/fib/dir24_8.h
@@ -7,7 +7,9 @@
 #define _DIR24_8_H_
 
 #include <stdalign.h>
+#include <stdbool.h>
 
+#include <rte_byteorder.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 
@@ -237,6 +239,46 @@ dir24_8_lookup_bulk_uni(void *p, const uint32_t *ips,
 	}
 }
 
+#define BSWAP_MAX_LENGTH	64
+
+typedef void (*dir24_8_lookup_bulk_be_cb)(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+static inline void
+dir24_8_lookup_bulk_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n,
+	dir24_8_lookup_bulk_be_cb cb)
+{
+	uint32_t le_ips[BSWAP_MAX_LENGTH];
+	unsigned int i;
+
+	for (i = 0; i < n; i += BSWAP_MAX_LENGTH) {
+		int j;
+		for (j = 0; j < BSWAP_MAX_LENGTH && i + j < n; j++)
+			le_ips[j] = rte_be_to_cpu_32(ips[i + j]);
+
+		cb(p, le_ips, next_hops + i, j);
+	}
+}
+
+#define DECLARE_BE_LOOKUP_FN(name)					\
+static inline void							\
+name##_be(void *p, const uint32_t *ips,					\
+	uint64_t *next_hops, const unsigned int n)			\
+{									\
+	dir24_8_lookup_bulk_be(p, ips, next_hops, n, name);		\
+}
+
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_1b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_2b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_4b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_8b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_0)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_1)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_2)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_3)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_uni)
+
 void *
 dir24_8_create(const char *name, int socket_id, struct rte_fib_conf *conf);
 
@@ -244,7 +286,7 @@ void
 dir24_8_free(void *p);
 
 rte_fib_lookup_fn_t
-dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type);
+dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type, bool be_addr);
 
 int
 dir24_8_modify(struct rte_fib *fib, uint32_t ip, uint8_t depth,
diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
index 43dba28cfb..edd802abe4 100644
--- a/lib/fib/dir24_8_avx512.c
+++ b/lib/fib/dir24_8_avx512.c
@@ -10,7 +10,7 @@
 
 static __rte_always_inline void
 dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
-	uint64_t *next_hops, int size)
+	uint64_t *next_hops, int size, bool be_addr)
 {
 	struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
 	__mmask16 msk_ext;
@@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
 		res_msk = _mm512_set1_epi32(UINT16_MAX);
 
 	ip_vec = _mm512_loadu_si512(ips);
+	if (be_addr) {
+		const __m512i bswap32 = _mm512_set_epi8(
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+		);
+		ip_vec = _mm512_shuffle_epi8(ip_vec, bswap32);
+	}
+
 	/* mask 24 most significant bits */
 	idxes = _mm512_srli_epi32(ip_vec, 8);
 
@@ -78,7 +88,7 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
 
 static __rte_always_inline void
 dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
-	uint64_t *next_hops)
+	uint64_t *next_hops, bool be_addr)
 {
 	struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
 	const __m512i zero = _mm512_set1_epi32(0);
@@ -89,6 +99,13 @@ dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
 	__mmask8 msk_ext;
 
 	ip_vec = _mm256_loadu_si256((const void *)ips);
+	if (be_addr) {
+		const __m256i bswap32 = _mm256_set_epi8(
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+		);
+		ip_vec = _mm256_shuffle_epi8(ip_vec, bswap32);
+	}
 	/* mask 24 most significant bits */
 	idxes_256 = _mm256_srli_epi32(ip_vec, 8);
 
@@ -114,52 +131,49 @@ dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
 	_mm512_storeu_si512(next_hops, res);
 }
 
-void
-rte_dir24_8_vec_lookup_bulk_1b(void *p, const uint32_t *ips,
-	uint64_t *next_hops, const unsigned int n)
-{
-	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint8_t));
-
-	dir24_8_lookup_bulk_1b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
+#define DECLARE_VECTOR_FN(suffix, nh_type, be_addr)			\
+void									\
+rte_dir24_8_vec_lookup_bulk_##suffix(void *p, const uint32_t *ips,	\
+	uint64_t *next_hops, const unsigned int n)			\
+{									\
+	uint32_t i;							\
+									\
+	for (i = 0; i < (n / 16); i++)					\
+		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16, \
+			sizeof(nh_type), be_addr);			\
+									\
+	dir24_8_lookup_bulk_##suffix(p, ips + i * 16, next_hops + i * 16, \
+		n - i * 16);						\
 }
 
-void
-rte_dir24_8_vec_lookup_bulk_2b(void *p, const uint32_t *ips,
-	uint64_t *next_hops, const unsigned int n)
-{
-	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint16_t));
-
-	dir24_8_lookup_bulk_2b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
-}
+DECLARE_VECTOR_FN(1b, uint8_t, false)
+DECLARE_VECTOR_FN(2b, uint16_t, false)
+DECLARE_VECTOR_FN(4b, uint32_t, false)
+DECLARE_VECTOR_FN(1b_be, uint8_t, true)
+DECLARE_VECTOR_FN(2b_be, uint16_t, true)
+DECLARE_VECTOR_FN(4b_be, uint32_t, true)
 
 void
-rte_dir24_8_vec_lookup_bulk_4b(void *p, const uint32_t *ips,
+rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint32_t));
+	for (i = 0; i < (n / 8); i++)
+		dir24_8_vec_lookup_x8_8b(p, ips + i * 8,
+			next_hops + i * 8, false);
 
-	dir24_8_lookup_bulk_4b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
+	dir24_8_lookup_bulk_8b(p, ips + i * 8, next_hops + i * 8, n - i * 8);
 }
 
 void
-rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
+rte_dir24_8_vec_lookup_bulk_8b_be(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
 	for (i = 0; i < (n / 8); i++)
-		dir24_8_vec_lookup_x8_8b(p, ips + i * 8, next_hops + i * 8);
+		dir24_8_vec_lookup_x8_8b(p, ips + i * 8,
+			next_hops + i * 8, true);
 
-	dir24_8_lookup_bulk_8b(p, ips + i * 8, next_hops + i * 8, n - i * 8);
+	dir24_8_lookup_bulk_8b_be(p, ips + i * 8,
+		next_hops + i * 8, n - i * 8);
 }
diff --git a/lib/fib/dir24_8_avx512.h b/lib/fib/dir24_8_avx512.h
index 1d3c2b9317..e9f7b72519 100644
--- a/lib/fib/dir24_8_avx512.h
+++ b/lib/fib/dir24_8_avx512.h
@@ -21,4 +21,19 @@ void
 rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n);
 
+void
+rte_dir24_8_vec_lookup_bulk_1b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_2b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_4b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_8b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
 #endif /* _DIR248_AVX512_H_ */
diff --git a/lib/fib/meson.build b/lib/fib/meson.build
index 6795f41a0a..8c03496cdc 100644
--- a/lib/fib/meson.build
+++ b/lib/fib/meson.build
@@ -25,40 +25,28 @@ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok
     # linked into main lib.
 
     # check if all required flags already enabled (variant a).
-    acl_avx512_flags = ['__AVX512F__','__AVX512DQ__']
-    acl_avx512_on = true
-    foreach f:acl_avx512_flags
+    fib_avx512_flags = ['__AVX512F__','__AVX512DQ__', '__AVX512BW__']
+    fib_avx512_on = true
+    foreach f:fib_avx512_flags
         if cc.get_define(f, args: machine_args) == ''
-            acl_avx512_on = false
+            fib_avx512_on = false
         endif
     endforeach
 
-    if acl_avx512_on == true
-        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
-        sources += files('dir24_8_avx512.c')
-        # TRIE AVX512 implementation uses avx512bw intrinsics along with
-        # avx512f and avx512dq
-        if cc.get_define('__AVX512BW__', args: machine_args) != ''
-            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
-            sources += files('trie_avx512.c')
-        endif
-    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq')
+    if fib_avx512_on == true
+        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']
+        sources += files('dir24_8_avx512.c', 'trie_avx512.c')
+    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq', '-mavx512bw')
         dir24_8_avx512_tmp = static_library('dir24_8_avx512_tmp',
                 'dir24_8_avx512.c',
                 dependencies: static_rte_eal,
-                c_args: cflags + ['-mavx512f', '-mavx512dq'])
+                c_args: cflags + ['-mavx512f', '-mavx512dq', '-mavx512bw'])
         objs += dir24_8_avx512_tmp.extract_objects('dir24_8_avx512.c')
-        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
-        # TRIE AVX512 implementation uses avx512bw intrinsics along with
-        # avx512f and avx512dq
-        if cc.has_argument('-mavx512bw')
-            trie_avx512_tmp = static_library('trie_avx512_tmp',
+        trie_avx512_tmp = static_library('trie_avx512_tmp',
                 'trie_avx512.c',
                 dependencies: static_rte_eal,
-                c_args: cflags + ['-mavx512f', \
-                    '-mavx512dq', '-mavx512bw'])
-            objs += trie_avx512_tmp.extract_objects('trie_avx512.c')
-            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
-        endif
+                c_args: cflags + ['-mavx512f', '-mavx512dq', '-mavx512bw'])
+        objs += trie_avx512_tmp.extract_objects('trie_avx512.c')
+        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']
     endif
 endif
diff --git a/lib/fib/rte_fib.c b/lib/fib/rte_fib.c
index 4f9fba5a4f..991e48b5ea 100644
--- a/lib/fib/rte_fib.c
+++ b/lib/fib/rte_fib.c
@@ -42,6 +42,7 @@ EAL_REGISTER_TAILQ(rte_fib_tailq)
 struct rte_fib {
 	char			name[RTE_FIB_NAMESIZE];
 	enum rte_fib_type	type;	/**< Type of FIB struct */
+	int flags;					/**< Flags */
 	struct rte_rib		*rib;	/**< RIB helper datastructure */
 	void			*dp;	/**< pointer to the dataplane struct*/
 	rte_fib_lookup_fn_t	lookup;	/**< FIB lookup function */
@@ -110,7 +111,7 @@ init_dataplane(struct rte_fib *fib, __rte_unused int socket_id,
 		if (fib->dp == NULL)
 			return -rte_errno;
 		fib->lookup = dir24_8_get_lookup_fn(fib->dp,
-			RTE_FIB_LOOKUP_DEFAULT);
+			RTE_FIB_LOOKUP_DEFAULT, !!(fib->flags & RTE_FIB_FLAG_LOOKUP_BE));
 		fib->modify = dir24_8_modify;
 		return 0;
 	default:
@@ -214,6 +215,7 @@ rte_fib_create(const char *name, int socket_id, struct rte_fib_conf *conf)
 	rte_strlcpy(fib->name, name, sizeof(fib->name));
 	fib->rib = rib;
 	fib->type = conf->type;
+	fib->flags = conf->flags;
 	fib->def_nh = conf->default_nh;
 	ret = init_dataplane(fib, socket_id, conf);
 	if (ret < 0) {
@@ -329,7 +331,8 @@ rte_fib_select_lookup(struct rte_fib *fib,
 
 	switch (fib->type) {
 	case RTE_FIB_DIR24_8:
-		fn = dir24_8_get_lookup_fn(fib->dp, type);
+		fn = dir24_8_get_lookup_fn(fib->dp, type,
+			!!(fib->flags & RTE_FIB_FLAG_LOOKUP_BE));
 		if (fn == NULL)
 			return -EINVAL;
 		fib->lookup = fn;
diff --git a/lib/fib/rte_fib.h b/lib/fib/rte_fib.h
index d7a5aafe53..1617235e85 100644
--- a/lib/fib/rte_fib.h
+++ b/lib/fib/rte_fib.h
@@ -28,6 +28,9 @@ struct rte_rib;
 /** Maximum depth value possible for IPv4 FIB. */
 #define RTE_FIB_MAXDEPTH	32
 
+/** If set fib lookup is expecting ipv4 in network byte order */
+#define RTE_FIB_FLAG_LOOKUP_BE	1
+
 /** Type of FIB struct */
 enum rte_fib_type {
 	RTE_FIB_DUMMY,		/**< RIB tree based FIB */
@@ -76,6 +79,7 @@ enum rte_fib_lookup_type {
 /** FIB configuration structure */
 struct rte_fib_conf {
 	enum rte_fib_type type; /**< Type of FIB struct */
+	unsigned int flags;
 	/** Default value returned on lookup if there is no route */
 	uint64_t default_nh;
 	int	max_routes;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fib: network byte order IPv4 lookup
  2024-10-08 17:16 [PATCH v2] fib: network byte order IPv4 lookup Vladimir Medvedkin
@ 2024-10-08 21:26 ` Stephen Hemminger
  2024-10-09  9:51 ` David Marchand
  2024-10-09 18:31 ` Stephen Hemminger
  2 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2024-10-08 21:26 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, rjarry, mb, david.marchand

On Tue,  8 Oct 2024 17:16:05 +0000
Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:

> Previously when running rte_fib_lookup IPv4 addresses must have been in
> host byte order.
> 
> This patch adds a new flag RTE_FIB_FLAG_LOOKUP_BE that can be passed on
> fib create, which will allow to have IPv4 in network byte order on
> lookup.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---

github build failed with this.

FAILED: lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o 
ccache gcc -Ilib/76b5a35@@rte_fib at sta -Ilib -I../lib -Ilib/fib -I../lib/fib -I. -I../ -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/include -I../lib/eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log -I../lib/log -Ilib/telemetry/../metrics -I../lib/telemetry/../metrics -Ilib/telemetry -I../lib/telemetry -Ilib/rib -I../lib/rib -Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wextra -Werror -std=c11 -O2 -g -include rte_config.h -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef -Wwrite-strings -Wno-address-of-packed-member -Wno-packed-not-aligned -Wno-missing-field-initializers -Wno-zero-length-bounds -Wno-pointer-to-int-cast -D_GNU_SOURCE -m32 -fPIC -march=corei7 -mrtm -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format-truncation -DRTE_LOG_DEFAULT_LOGTYPE=lib.fib -MD -MQ 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o' -MF 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o.d' -o 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o' -c ../lib/fib/dir24_8.c
../lib/fib/dir24_8.c: In function ‘get_vector_fn’:
../lib/fib/dir24_8.c:71:54: error: unused parameter ‘be_addr’ [-Werror=unused-parameter]
   71 | get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
      |    

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fib: network byte order IPv4 lookup
  2024-10-08 17:16 [PATCH v2] fib: network byte order IPv4 lookup Vladimir Medvedkin
  2024-10-08 21:26 ` Stephen Hemminger
@ 2024-10-09  9:51 ` David Marchand
  2024-10-09 10:56   ` David Marchand
  2024-10-09 18:31 ` Stephen Hemminger
  2 siblings, 1 reply; 7+ messages in thread
From: David Marchand @ 2024-10-09  9:51 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, rjarry, mb, Stephen Hemminger

Hi Vladimir,

On Tue, Oct 8, 2024 at 7:16 PM Vladimir Medvedkin
<vladimir.medvedkin@intel.com> wrote:
> diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
> index 43dba28cfb..edd802abe4 100644
> --- a/lib/fib/dir24_8_avx512.c
> +++ b/lib/fib/dir24_8_avx512.c
> @@ -10,7 +10,7 @@
>
>  static __rte_always_inline void
>  dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
> -       uint64_t *next_hops, int size)
> +       uint64_t *next_hops, int size, bool be_addr)
>  {
>         struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
>         __mmask16 msk_ext;
> @@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
>                 res_msk = _mm512_set1_epi32(UINT16_MAX);
>
>         ip_vec = _mm512_loadu_si512(ips);
> +       if (be_addr) {
> +               const __m512i bswap32 = _mm512_set_epi8(

Some toolchains do not like _mm512_set_epi8 (reported by the CI).


> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
> +                       12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
> +               );
> +               ip_vec = _mm512_shuffle_epi8(ip_vec, bswap32);
> +       }
> +
>         /* mask 24 most significant bits */
>         idxes = _mm512_srli_epi32(ip_vec, 8);
>

[snip]

> diff --git a/lib/fib/meson.build b/lib/fib/meson.build
> index 6795f41a0a..8c03496cdc 100644
> --- a/lib/fib/meson.build
> +++ b/lib/fib/meson.build
> @@ -25,40 +25,28 @@ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok
>      # linked into main lib.
>
>      # check if all required flags already enabled (variant a).
> -    acl_avx512_flags = ['__AVX512F__','__AVX512DQ__']
> -    acl_avx512_on = true
> -    foreach f:acl_avx512_flags
> +    fib_avx512_flags = ['__AVX512F__','__AVX512DQ__', '__AVX512BW__']
> +    fib_avx512_on = true
> +    foreach f:fib_avx512_flags
>          if cc.get_define(f, args: machine_args) == ''
> -            acl_avx512_on = false
> +            fib_avx512_on = false
>          endif
>      endforeach

Please reuse the common checks recently merged, see for example:
https://git.dpdk.org/dpdk/diff/drivers/event/dlb2/meson.build?id=ef7a4025cd714189dc333bb19ea60c2abdeffb7d


>
> -    if acl_avx512_on == true
> -        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
> -        sources += files('dir24_8_avx512.c')
> -        # TRIE AVX512 implementation uses avx512bw intrinsics along with
> -        # avx512f and avx512dq
> -        if cc.get_define('__AVX512BW__', args: machine_args) != ''
> -            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
> -            sources += files('trie_avx512.c')
> -        endif
> -    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq')
> +    if fib_avx512_on == true
> +        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']

Nit: now that both dir24_8 and trie share the same requirement, can we
go with a simple CC_AVX512_SUPPORT?


-- 
David Marchand


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fib: network byte order IPv4 lookup
  2024-10-09  9:51 ` David Marchand
@ 2024-10-09 10:56   ` David Marchand
  2024-10-10 11:19     ` Medvedkin, Vladimir
  0 siblings, 1 reply; 7+ messages in thread
From: David Marchand @ 2024-10-09 10:56 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, rjarry, mb, Stephen Hemminger

On Wed, Oct 9, 2024 at 11:51 AM David Marchand
<david.marchand@redhat.com> wrote:
>
> Hi Vladimir,
>
> On Tue, Oct 8, 2024 at 7:16 PM Vladimir Medvedkin
> <vladimir.medvedkin@intel.com> wrote:
> > diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
> > index 43dba28cfb..edd802abe4 100644
> > --- a/lib/fib/dir24_8_avx512.c
> > +++ b/lib/fib/dir24_8_avx512.c
> > @@ -10,7 +10,7 @@
> >
> >  static __rte_always_inline void
> >  dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
> > -       uint64_t *next_hops, int size)
> > +       uint64_t *next_hops, int size, bool be_addr)
> >  {
> >         struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
> >         __mmask16 msk_ext;
> > @@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
> >                 res_msk = _mm512_set1_epi32(UINT16_MAX);
> >
> >         ip_vec = _mm512_loadu_si512(ips);
> > +       if (be_addr) {
> > +               const __m512i bswap32 = _mm512_set_epi8(
>
> Some toolchains do not like _mm512_set_epi8 (reported by the CI).

Probably related to gcc commit 4e6a811fad69 ("avx512fintrin.h
(_mm512_set_epi16, [...]): New intrinsics.") introduced in gcc 9.
Some distributions in the CI come with a gcc < 9.

We had a similar situation in the hash library in the past,
fba335b4b204 ("hash: fix Toeplitz hash implementation").


-- 
David Marchand


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fib: network byte order IPv4 lookup
  2024-10-08 17:16 [PATCH v2] fib: network byte order IPv4 lookup Vladimir Medvedkin
  2024-10-08 21:26 ` Stephen Hemminger
  2024-10-09  9:51 ` David Marchand
@ 2024-10-09 18:31 ` Stephen Hemminger
  2 siblings, 0 replies; 7+ messages in thread
From: Stephen Hemminger @ 2024-10-09 18:31 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, rjarry, mb, david.marchand

On Tue,  8 Oct 2024 17:16:05 +0000
Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:

> Previously when running rte_fib_lookup IPv4 addresses must have been in
> host byte order.
> 
> This patch adds a new flag RTE_FIB_FLAG_LOOKUP_BE that can be passed on
> fib create, which will allow to have IPv4 in network byte order on
> lookup.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

Need to address David's comment see the build failures.

Build URL: https://github.com/ovsrobot/dpdk/actions/runs/11240928255
Build Logs:
-----------------------Summary of failed steps-----------------------
"ubuntu-22.04-gcc-static-i386" failed at step Build and test
"ubuntu-22.04-gcc-shared-aarch64" failed at step Build and test
"ubuntu-22.04-gcc-ppc64le" failed at step Build and test
"ubuntu-22.04-gcc-riscv64" failed at step Build and test
----------------------End summary of failed steps--------------------

-------------------------------BEGIN LOGS----------------------------
####################################################################################
#### [Begin job log] "ubuntu-22.04-gcc-static-i386" at step Build and test
####################################################################################
[423/3186] Compiling C object 'lib/76b5a35@@rte_fib at sta/fib_rte_fib.c.o'.
[424/3186] Compiling C object 'lib/76b5a35@@rte_pdcp at sta/pdcp_pdcp_ctrl_pdu.c.o'.
[425/3186] Compiling C object 'lib/76b5a35@@rte_pdcp at sta/pdcp_pdcp_reorder.c.o'.
[426/3186] Generating ipsec.sym_chk with a meson_exe.py custom command.
[427/3186] Linking target lib/librte_ipsec.so.25.0.
[428/3186] Compiling C object 'lib/76b5a35@@rte_pdcp at sta/pdcp_rte_pdcp.c.o'.
[429/3186] Generating symbol file 'lib/76b5a35@@rte_ipsec at sha/librte_ipsec.so.25.0.symbols'.
[430/3186] Compiling C object 'lib/76b5a35@@rte_fib at sta/fib_rte_fib6.c.o'.
[431/3186] Compiling C object 'lib/76b5a35@@rte_port at sta/port_port_log.c.o'.
[432/3186] Compiling C object 'lib/76b5a35@@rte_fib at sta/fib_trie.c.o'.
[433/3186] Compiling C object 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o'.
FAILED: lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o 
ccache gcc -Ilib/76b5a35@@rte_fib at sta -Ilib -I../lib -Ilib/fib -I../lib/fib -I. -I../ -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/include -I../lib/eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include -Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs -I../lib/kvargs -Ilib/log -I../lib/log -Ilib/telemetry/../metrics -I../lib/telemetry/../metrics -Ilib/telemetry -I../lib/telemetry -Ilib/rib -I../lib/rib -Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring -fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Wextra -Werror -std=c11 -O2 -g -include rte_config.h -Wcast-qual -Wdeprecated -Wformat -Wformat-nonliteral -Wformat-security -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef -Wwrite-strings -Wno-address-of-packed-member -Wno-packed-not-aligned -Wno-missing-field-initializers -Wno-zero-length-bounds -Wno-pointer-to-int-cast -D_GNU_SOURCE -m32 -fPIC -march=corei7 -mrtm -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API -Wno-format-truncation -DRTE_LOG_DEFAULT_LOGTYPE=lib.fib -MD -MQ 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o' -MF 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o.d' -o 'lib/76b5a35@@rte_fib at sta/fib_dir24_8.c.o' -c ../lib/fib/dir24_8.c
../lib/fib/dir24_8.c: In function ‘get_vector_fn’:
../lib/fib/dir24_8.c:71:54: error: unused parameter ‘be_addr’ [-Werror=unused-parameter]
   71 | get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
      |                                                      ^
cc1: all warnings being treated as errors
[434/3186] Compiling C object 'lib/76b5a35@@rte_port at sta/port_rte_port_ethdev.c.o'.
[435/3186] Compiling C object 'lib/76b5a35@@rte_port at sta/port_rte_port_fd.c.o'.
[436/3186] Compiling C object 'lib/76b5a35@@rte_pdcp at sta/pdcp_pdcp_process.c.o'.
[437/3186] Compiling C object 'lib/76b5a35@@rte_vhost at sta/vhost_vhost_crypto.c.o'.
[438/3186] Compiling C object 'lib/76b5a35@@rte_vhost at sta/vhost_virtio_net.c.o'.
ninja: build stopped: subcommand failed.
##[error]Process completed with exit code 1.
####################################################################################
#### [End job log] "ubuntu-22.04-gcc-static-i386" at step Build and test
####################################################################################

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fib: network byte order IPv4 lookup
  2024-10-09 10:56   ` David Marchand
@ 2024-10-10 11:19     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 7+ messages in thread
From: Medvedkin, Vladimir @ 2024-10-10 11:19 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, rjarry, mb, Stephen Hemminger

Hi David,

On 09/10/2024 11:56, David Marchand wrote:
> On Wed, Oct 9, 2024 at 11:51 AM David Marchand
> <david.marchand@redhat.com> wrote:
>> Hi Vladimir,
>>
>> On Tue, Oct 8, 2024 at 7:16 PM Vladimir Medvedkin
>> <vladimir.medvedkin@intel.com> wrote:
>>> diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
>>> index 43dba28cfb..edd802abe4 100644
>>> --- a/lib/fib/dir24_8_avx512.c
>>> +++ b/lib/fib/dir24_8_avx512.c
>>> @@ -10,7 +10,7 @@
>>>
>>>   static __rte_always_inline void
>>>   dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
>>> -       uint64_t *next_hops, int size)
>>> +       uint64_t *next_hops, int size, bool be_addr)
>>>   {
>>>          struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
>>>          __mmask16 msk_ext;
>>> @@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
>>>                  res_msk = _mm512_set1_epi32(UINT16_MAX);
>>>
>>>          ip_vec = _mm512_loadu_si512(ips);
>>> +       if (be_addr) {
>>> +               const __m512i bswap32 = _mm512_set_epi8(
>> Some toolchains do not like _mm512_set_epi8 (reported by the CI).
> Probably related to gcc commit 4e6a811fad69 ("avx512fintrin.h
> (_mm512_set_epi16, [...]): New intrinsics.") introduced in gcc 9.
> Some distributions in the CI come with a gcc < 9.
>
> We had a similar situation in the hash library in the past,
> fba335b4b204 ("hash: fix Toeplitz hash implementation").
>
Thanks, I'll change it with _mm512_set_epi32(), afaik it should be 
supported by an old gcc

-- 
Regards,
Vladimir


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] fib: network byte order IPv4 lookup
  2024-09-06 17:06 [PATCH] " Vladimir Medvedkin
@ 2024-10-08 17:33 ` Vladimir Medvedkin
  0 siblings, 0 replies; 7+ messages in thread
From: Vladimir Medvedkin @ 2024-10-08 17:33 UTC (permalink / raw)
  To: dev; +Cc: rjarry, mb, david.marchand

Previously when running rte_fib_lookup IPv4 addresses must have been in
host byte order.

This patch adds a new flag RTE_FIB_FLAG_LOOKUP_BE that can be passed on
fib create, which will allow to have IPv4 in network byte order on
lookup.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 app/test/test_fib.c      |  2 +-
 lib/fib/dir24_8.c        | 62 +++++++++++++++++++-----------
 lib/fib/dir24_8.h        | 44 ++++++++++++++++++++-
 lib/fib/dir24_8_avx512.c | 82 +++++++++++++++++++++++-----------------
 lib/fib/dir24_8_avx512.h | 15 ++++++++
 lib/fib/meson.build      | 38 +++++++------------
 lib/fib/rte_fib.c        |  7 +++-
 lib/fib/rte_fib.h        |  4 ++
 8 files changed, 169 insertions(+), 85 deletions(-)

diff --git a/app/test/test_fib.c b/app/test/test_fib.c
index 45dccca1f6..b0e53dbe01 100644
--- a/app/test/test_fib.c
+++ b/app/test/test_fib.c
@@ -319,7 +319,7 @@ int32_t
 test_lookup(void)
 {
 	struct rte_fib *fib = NULL;
-	struct rte_fib_conf config;
+	struct rte_fib_conf config = { 0 };
 	uint64_t def_nh = 100;
 	int ret;
 
diff --git a/lib/fib/dir24_8.c b/lib/fib/dir24_8.c
index c739e92304..5520f0f519 100644
--- a/lib/fib/dir24_8.c
+++ b/lib/fib/dir24_8.c
@@ -26,56 +26,72 @@
 #define ROUNDUP(x, y)	 RTE_ALIGN_CEIL(x, (1 << (32 - y)))
 
 static inline rte_fib_lookup_fn_t
-get_scalar_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_scalar_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return dir24_8_lookup_bulk_1b;
+		return (be_addr) ? dir24_8_lookup_bulk_1b_be :
+					dir24_8_lookup_bulk_1b;
 	case RTE_FIB_DIR24_8_2B:
-		return dir24_8_lookup_bulk_2b;
+		return (be_addr) ? dir24_8_lookup_bulk_2b_be :
+					dir24_8_lookup_bulk_2b;
 	case RTE_FIB_DIR24_8_4B:
-		return dir24_8_lookup_bulk_4b;
+		return (be_addr) ? dir24_8_lookup_bulk_4b_be :
+					dir24_8_lookup_bulk_4b;
 	case RTE_FIB_DIR24_8_8B:
-		return dir24_8_lookup_bulk_8b;
+		return (be_addr) ? dir24_8_lookup_bulk_8b_be :
+					dir24_8_lookup_bulk_8b;
 	default:
 		return NULL;
 	}
 }
 
 static inline rte_fib_lookup_fn_t
-get_scalar_fn_inlined(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_scalar_fn_inlined(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return dir24_8_lookup_bulk_0;
+		return (be_addr) ? dir24_8_lookup_bulk_0_be :
+					dir24_8_lookup_bulk_0;
 	case RTE_FIB_DIR24_8_2B:
-		return dir24_8_lookup_bulk_1;
+		return (be_addr) ? dir24_8_lookup_bulk_1_be :
+					dir24_8_lookup_bulk_1;
 	case RTE_FIB_DIR24_8_4B:
-		return dir24_8_lookup_bulk_2;
+		return (be_addr) ? dir24_8_lookup_bulk_2_be :
+					dir24_8_lookup_bulk_2;
 	case RTE_FIB_DIR24_8_8B:
-		return dir24_8_lookup_bulk_3;
+		return (be_addr) ? dir24_8_lookup_bulk_3_be :
+					dir24_8_lookup_bulk_3;
 	default:
 		return NULL;
 	}
 }
 
 static inline rte_fib_lookup_fn_t
-get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
+get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz, bool be_addr)
 {
 #ifdef CC_DIR24_8_AVX512_SUPPORT
 	if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) <= 0) ||
+		(rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ) <= 0) ||
 			(rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512))
 		return NULL;
 
+	if (be_addr && (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) <= 0))
+		return NULL;
+
 	switch (nh_sz) {
 	case RTE_FIB_DIR24_8_1B:
-		return rte_dir24_8_vec_lookup_bulk_1b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_1b_be :
+					rte_dir24_8_vec_lookup_bulk_1b;
 	case RTE_FIB_DIR24_8_2B:
-		return rte_dir24_8_vec_lookup_bulk_2b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_2b_be :
+					rte_dir24_8_vec_lookup_bulk_2b;
 	case RTE_FIB_DIR24_8_4B:
-		return rte_dir24_8_vec_lookup_bulk_4b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_4b_be :
+					rte_dir24_8_vec_lookup_bulk_4b;
 	case RTE_FIB_DIR24_8_8B:
-		return rte_dir24_8_vec_lookup_bulk_8b;
+		return (be_addr) ? rte_dir24_8_vec_lookup_bulk_8b_be :
+					rte_dir24_8_vec_lookup_bulk_8b;
 	default:
 		return NULL;
 	}
@@ -86,7 +102,7 @@ get_vector_fn(enum rte_fib_dir24_8_nh_sz nh_sz)
 }
 
 rte_fib_lookup_fn_t
-dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type)
+dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type, bool be_addr)
 {
 	enum rte_fib_dir24_8_nh_sz nh_sz;
 	rte_fib_lookup_fn_t ret_fn;
@@ -99,16 +115,18 @@ dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type)
 
 	switch (type) {
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_MACRO:
-		return get_scalar_fn(nh_sz);
+		return get_scalar_fn(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_INLINE:
-		return get_scalar_fn_inlined(nh_sz);
+		return get_scalar_fn_inlined(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DIR24_8_SCALAR_UNI:
-		return dir24_8_lookup_bulk_uni;
+		return (be_addr) ? dir24_8_lookup_bulk_uni_be :
+						dir24_8_lookup_bulk_uni;
 	case RTE_FIB_LOOKUP_DIR24_8_VECTOR_AVX512:
-		return get_vector_fn(nh_sz);
+		return get_vector_fn(nh_sz, be_addr);
 	case RTE_FIB_LOOKUP_DEFAULT:
-		ret_fn = get_vector_fn(nh_sz);
-		return (ret_fn != NULL) ? ret_fn : get_scalar_fn(nh_sz);
+		ret_fn = get_vector_fn(nh_sz, be_addr);
+		return (ret_fn != NULL) ? ret_fn :
+			get_scalar_fn(nh_sz, be_addr);
 	default:
 		return NULL;
 	}
diff --git a/lib/fib/dir24_8.h b/lib/fib/dir24_8.h
index 7125049f15..2c776e118f 100644
--- a/lib/fib/dir24_8.h
+++ b/lib/fib/dir24_8.h
@@ -7,7 +7,9 @@
 #define _DIR24_8_H_
 
 #include <stdalign.h>
+#include <stdbool.h>
 
+#include <rte_byteorder.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 
@@ -237,6 +239,46 @@ dir24_8_lookup_bulk_uni(void *p, const uint32_t *ips,
 	}
 }
 
+#define BSWAP_MAX_LENGTH	64
+
+typedef void (*dir24_8_lookup_bulk_be_cb)(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+static inline void
+dir24_8_lookup_bulk_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n,
+	dir24_8_lookup_bulk_be_cb cb)
+{
+	uint32_t le_ips[BSWAP_MAX_LENGTH];
+	unsigned int i;
+
+	for (i = 0; i < n; i += BSWAP_MAX_LENGTH) {
+		int j;
+		for (j = 0; j < BSWAP_MAX_LENGTH && i + j < n; j++)
+			le_ips[j] = rte_be_to_cpu_32(ips[i + j]);
+
+		cb(p, le_ips, next_hops + i, j);
+	}
+}
+
+#define DECLARE_BE_LOOKUP_FN(name)					\
+static inline void							\
+name##_be(void *p, const uint32_t *ips,					\
+	uint64_t *next_hops, const unsigned int n)			\
+{									\
+	dir24_8_lookup_bulk_be(p, ips, next_hops, n, name);		\
+}
+
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_1b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_2b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_4b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_8b)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_0)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_1)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_2)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_3)
+DECLARE_BE_LOOKUP_FN(dir24_8_lookup_bulk_uni)
+
 void *
 dir24_8_create(const char *name, int socket_id, struct rte_fib_conf *conf);
 
@@ -244,7 +286,7 @@ void
 dir24_8_free(void *p);
 
 rte_fib_lookup_fn_t
-dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type);
+dir24_8_get_lookup_fn(void *p, enum rte_fib_lookup_type type, bool be_addr);
 
 int
 dir24_8_modify(struct rte_fib *fib, uint32_t ip, uint8_t depth,
diff --git a/lib/fib/dir24_8_avx512.c b/lib/fib/dir24_8_avx512.c
index 43dba28cfb..edd802abe4 100644
--- a/lib/fib/dir24_8_avx512.c
+++ b/lib/fib/dir24_8_avx512.c
@@ -10,7 +10,7 @@
 
 static __rte_always_inline void
 dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
-	uint64_t *next_hops, int size)
+	uint64_t *next_hops, int size, bool be_addr)
 {
 	struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
 	__mmask16 msk_ext;
@@ -28,6 +28,16 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
 		res_msk = _mm512_set1_epi32(UINT16_MAX);
 
 	ip_vec = _mm512_loadu_si512(ips);
+	if (be_addr) {
+		const __m512i bswap32 = _mm512_set_epi8(
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+		);
+		ip_vec = _mm512_shuffle_epi8(ip_vec, bswap32);
+	}
+
 	/* mask 24 most significant bits */
 	idxes = _mm512_srli_epi32(ip_vec, 8);
 
@@ -78,7 +88,7 @@ dir24_8_vec_lookup_x16(void *p, const uint32_t *ips,
 
 static __rte_always_inline void
 dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
-	uint64_t *next_hops)
+	uint64_t *next_hops, bool be_addr)
 {
 	struct dir24_8_tbl *dp = (struct dir24_8_tbl *)p;
 	const __m512i zero = _mm512_set1_epi32(0);
@@ -89,6 +99,13 @@ dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
 	__mmask8 msk_ext;
 
 	ip_vec = _mm256_loadu_si256((const void *)ips);
+	if (be_addr) {
+		const __m256i bswap32 = _mm256_set_epi8(
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3,
+			12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+		);
+		ip_vec = _mm256_shuffle_epi8(ip_vec, bswap32);
+	}
 	/* mask 24 most significant bits */
 	idxes_256 = _mm256_srli_epi32(ip_vec, 8);
 
@@ -114,52 +131,49 @@ dir24_8_vec_lookup_x8_8b(void *p, const uint32_t *ips,
 	_mm512_storeu_si512(next_hops, res);
 }
 
-void
-rte_dir24_8_vec_lookup_bulk_1b(void *p, const uint32_t *ips,
-	uint64_t *next_hops, const unsigned int n)
-{
-	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint8_t));
-
-	dir24_8_lookup_bulk_1b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
+#define DECLARE_VECTOR_FN(suffix, nh_type, be_addr)			\
+void									\
+rte_dir24_8_vec_lookup_bulk_##suffix(void *p, const uint32_t *ips,	\
+	uint64_t *next_hops, const unsigned int n)			\
+{									\
+	uint32_t i;							\
+									\
+	for (i = 0; i < (n / 16); i++)					\
+		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16, \
+			sizeof(nh_type), be_addr);			\
+									\
+	dir24_8_lookup_bulk_##suffix(p, ips + i * 16, next_hops + i * 16, \
+		n - i * 16);						\
 }
 
-void
-rte_dir24_8_vec_lookup_bulk_2b(void *p, const uint32_t *ips,
-	uint64_t *next_hops, const unsigned int n)
-{
-	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint16_t));
-
-	dir24_8_lookup_bulk_2b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
-}
+DECLARE_VECTOR_FN(1b, uint8_t, false)
+DECLARE_VECTOR_FN(2b, uint16_t, false)
+DECLARE_VECTOR_FN(4b, uint32_t, false)
+DECLARE_VECTOR_FN(1b_be, uint8_t, true)
+DECLARE_VECTOR_FN(2b_be, uint16_t, true)
+DECLARE_VECTOR_FN(4b_be, uint32_t, true)
 
 void
-rte_dir24_8_vec_lookup_bulk_4b(void *p, const uint32_t *ips,
+rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
-	for (i = 0; i < (n / 16); i++)
-		dir24_8_vec_lookup_x16(p, ips + i * 16, next_hops + i * 16,
-			sizeof(uint32_t));
+	for (i = 0; i < (n / 8); i++)
+		dir24_8_vec_lookup_x8_8b(p, ips + i * 8,
+			next_hops + i * 8, false);
 
-	dir24_8_lookup_bulk_4b(p, ips + i * 16, next_hops + i * 16,
-		n - i * 16);
+	dir24_8_lookup_bulk_8b(p, ips + i * 8, next_hops + i * 8, n - i * 8);
 }
 
 void
-rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
+rte_dir24_8_vec_lookup_bulk_8b_be(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
 	for (i = 0; i < (n / 8); i++)
-		dir24_8_vec_lookup_x8_8b(p, ips + i * 8, next_hops + i * 8);
+		dir24_8_vec_lookup_x8_8b(p, ips + i * 8,
+			next_hops + i * 8, true);
 
-	dir24_8_lookup_bulk_8b(p, ips + i * 8, next_hops + i * 8, n - i * 8);
+	dir24_8_lookup_bulk_8b_be(p, ips + i * 8,
+		next_hops + i * 8, n - i * 8);
 }
diff --git a/lib/fib/dir24_8_avx512.h b/lib/fib/dir24_8_avx512.h
index 1d3c2b9317..e9f7b72519 100644
--- a/lib/fib/dir24_8_avx512.h
+++ b/lib/fib/dir24_8_avx512.h
@@ -21,4 +21,19 @@ void
 rte_dir24_8_vec_lookup_bulk_8b(void *p, const uint32_t *ips,
 	uint64_t *next_hops, const unsigned int n);
 
+void
+rte_dir24_8_vec_lookup_bulk_1b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_2b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_4b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_dir24_8_vec_lookup_bulk_8b_be(void *p, const uint32_t *ips,
+	uint64_t *next_hops, const unsigned int n);
 #endif /* _DIR248_AVX512_H_ */
diff --git a/lib/fib/meson.build b/lib/fib/meson.build
index 6795f41a0a..8c03496cdc 100644
--- a/lib/fib/meson.build
+++ b/lib/fib/meson.build
@@ -25,40 +25,28 @@ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok
     # linked into main lib.
 
     # check if all required flags already enabled (variant a).
-    acl_avx512_flags = ['__AVX512F__','__AVX512DQ__']
-    acl_avx512_on = true
-    foreach f:acl_avx512_flags
+    fib_avx512_flags = ['__AVX512F__','__AVX512DQ__', '__AVX512BW__']
+    fib_avx512_on = true
+    foreach f:fib_avx512_flags
         if cc.get_define(f, args: machine_args) == ''
-            acl_avx512_on = false
+            fib_avx512_on = false
         endif
     endforeach
 
-    if acl_avx512_on == true
-        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
-        sources += files('dir24_8_avx512.c')
-        # TRIE AVX512 implementation uses avx512bw intrinsics along with
-        # avx512f and avx512dq
-        if cc.get_define('__AVX512BW__', args: machine_args) != ''
-            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
-            sources += files('trie_avx512.c')
-        endif
-    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq')
+    if fib_avx512_on == true
+        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']
+        sources += files('dir24_8_avx512.c', 'trie_avx512.c')
+    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq', '-mavx512bw')
         dir24_8_avx512_tmp = static_library('dir24_8_avx512_tmp',
                 'dir24_8_avx512.c',
                 dependencies: static_rte_eal,
-                c_args: cflags + ['-mavx512f', '-mavx512dq'])
+                c_args: cflags + ['-mavx512f', '-mavx512dq', '-mavx512bw'])
         objs += dir24_8_avx512_tmp.extract_objects('dir24_8_avx512.c')
-        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT']
-        # TRIE AVX512 implementation uses avx512bw intrinsics along with
-        # avx512f and avx512dq
-        if cc.has_argument('-mavx512bw')
-            trie_avx512_tmp = static_library('trie_avx512_tmp',
+        trie_avx512_tmp = static_library('trie_avx512_tmp',
                 'trie_avx512.c',
                 dependencies: static_rte_eal,
-                c_args: cflags + ['-mavx512f', \
-                    '-mavx512dq', '-mavx512bw'])
-            objs += trie_avx512_tmp.extract_objects('trie_avx512.c')
-            cflags += ['-DCC_TRIE_AVX512_SUPPORT']
-        endif
+                c_args: cflags + ['-mavx512f', '-mavx512dq', '-mavx512bw'])
+        objs += trie_avx512_tmp.extract_objects('trie_avx512.c')
+        cflags += ['-DCC_DIR24_8_AVX512_SUPPORT', '-DCC_TRIE_AVX512_SUPPORT']
     endif
 endif
diff --git a/lib/fib/rte_fib.c b/lib/fib/rte_fib.c
index 4f9fba5a4f..991e48b5ea 100644
--- a/lib/fib/rte_fib.c
+++ b/lib/fib/rte_fib.c
@@ -42,6 +42,7 @@ EAL_REGISTER_TAILQ(rte_fib_tailq)
 struct rte_fib {
 	char			name[RTE_FIB_NAMESIZE];
 	enum rte_fib_type	type;	/**< Type of FIB struct */
+	int flags;					/**< Flags */
 	struct rte_rib		*rib;	/**< RIB helper datastructure */
 	void			*dp;	/**< pointer to the dataplane struct*/
 	rte_fib_lookup_fn_t	lookup;	/**< FIB lookup function */
@@ -110,7 +111,7 @@ init_dataplane(struct rte_fib *fib, __rte_unused int socket_id,
 		if (fib->dp == NULL)
 			return -rte_errno;
 		fib->lookup = dir24_8_get_lookup_fn(fib->dp,
-			RTE_FIB_LOOKUP_DEFAULT);
+			RTE_FIB_LOOKUP_DEFAULT, !!(fib->flags & RTE_FIB_FLAG_LOOKUP_BE));
 		fib->modify = dir24_8_modify;
 		return 0;
 	default:
@@ -214,6 +215,7 @@ rte_fib_create(const char *name, int socket_id, struct rte_fib_conf *conf)
 	rte_strlcpy(fib->name, name, sizeof(fib->name));
 	fib->rib = rib;
 	fib->type = conf->type;
+	fib->flags = conf->flags;
 	fib->def_nh = conf->default_nh;
 	ret = init_dataplane(fib, socket_id, conf);
 	if (ret < 0) {
@@ -329,7 +331,8 @@ rte_fib_select_lookup(struct rte_fib *fib,
 
 	switch (fib->type) {
 	case RTE_FIB_DIR24_8:
-		fn = dir24_8_get_lookup_fn(fib->dp, type);
+		fn = dir24_8_get_lookup_fn(fib->dp, type,
+			!!(fib->flags & RTE_FIB_FLAG_LOOKUP_BE));
 		if (fn == NULL)
 			return -EINVAL;
 		fib->lookup = fn;
diff --git a/lib/fib/rte_fib.h b/lib/fib/rte_fib.h
index d7a5aafe53..1617235e85 100644
--- a/lib/fib/rte_fib.h
+++ b/lib/fib/rte_fib.h
@@ -28,6 +28,9 @@ struct rte_rib;
 /** Maximum depth value possible for IPv4 FIB. */
 #define RTE_FIB_MAXDEPTH	32
 
+/** If set fib lookup is expecting ipv4 in network byte order */
+#define RTE_FIB_FLAG_LOOKUP_BE	1
+
 /** Type of FIB struct */
 enum rte_fib_type {
 	RTE_FIB_DUMMY,		/**< RIB tree based FIB */
@@ -76,6 +79,7 @@ enum rte_fib_lookup_type {
 /** FIB configuration structure */
 struct rte_fib_conf {
 	enum rte_fib_type type; /**< Type of FIB struct */
+	unsigned int flags;
 	/** Default value returned on lookup if there is no route */
 	uint64_t default_nh;
 	int	max_routes;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-10-10 11:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-08 17:16 [PATCH v2] fib: network byte order IPv4 lookup Vladimir Medvedkin
2024-10-08 21:26 ` Stephen Hemminger
2024-10-09  9:51 ` David Marchand
2024-10-09 10:56   ` David Marchand
2024-10-10 11:19     ` Medvedkin, Vladimir
2024-10-09 18:31 ` Stephen Hemminger
  -- strict thread matches above, loose matches on Subject: below --
2024-09-06 17:06 [PATCH] " Vladimir Medvedkin
2024-10-08 17:33 ` [PATCH v2] " Vladimir Medvedkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).