DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev]  [RFC] hash: unify crc32 API header for x86 and ARM
@ 2020-04-29 18:05 pbhagavatula
  2020-04-30  9:14 ` Van Haaren, Harry
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: pbhagavatula @ 2020-04-29 18:05 UTC (permalink / raw)
  To: jerinj, thomas, Yipeng Wang, Sameh Gobriel, Bruce Richardson,
	Ruifeng Wang
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Merge crc32 hash calculation public API headers for x86 and ARM,
split implementations of x86 and ARM into their respective private
headers.
This reduces the ifdef code clutter while keeping current ABI intact.

Although we install `rte_crc_arm64.h` it is not used in any of the lib or
drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
back to SW crc32 calculation for ARM platform.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---

 Currently, if application incorrectly sets CRC32_ARM64 as crc32 algorithm
 through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to algorithm
 set previously via `rte_hash_crc_set_alg()` instead of setting the best
 available.
 This behaviour should probably change to setting the best available algorithm
 and is up for discussion.

 app/test/test_hash.c            |   6 +
 lib/librte_hash/Makefile        |   5 -
 lib/librte_hash/crc_arm64.h     |  67 +++++++++++
 lib/librte_hash/crc_x86.h       |  68 +++++++++++
 lib/librte_hash/meson.build     |   3 +-
 lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
 lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-------------------
 7 files changed, 219 insertions(+), 306 deletions(-)
 create mode 100644 lib/librte_hash/crc_arm64.h
 create mode 100644 lib/librte_hash/crc_x86.h
 delete mode 100644 lib/librte_hash/rte_crc_arm64.h

diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index afa3a1a3c..7bd457dac 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
 	}

 	/* Resetting to best available algorithm */
+#if defined RTE_ARCH_X86
 	rte_hash_crc_set_alg(CRC32_SSE42_x64);
+#elif defined RTE_ARCH_ARM64
+	rte_hash_crc_set_alg(CRC32_ARM64);
+#else
+	rte_hash_crc_set_alg(CRC32_SW);
+#endif

 	if (i == CRC32_ITERATIONS)
 		return 0;
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index ec9f86499..f640afc42 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
-ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
-ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
-SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
-endif
-endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
diff --git a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h
new file mode 100644
index 000000000..8e75f8297
--- /dev/null
+++ b/lib/librte_hash/crc_arm64.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _CRC_ARM64_H_
+#define _CRC_ARM64_H_
+
+/**
+ * @file
+ *
+ * CRC arm64 Hash
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+
+static inline uint32_t
+crc32c_arm64_u8(uint8_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cb %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u16(uint16_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32ch %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u32(uint32_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cw %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u64(uint64_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cx %w[crc], %w[crc], %x[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CRC_ARM64_H_ */
diff --git a/lib/librte_hash/crc_x86.h b/lib/librte_hash/crc_x86.h
new file mode 100644
index 000000000..fb45f2e98
--- /dev/null
+++ b/lib/librte_hash/crc_x86.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _CRC_X86_H_
+#define _CRC_X86_H_
+
+#if defined(RTE_ARCH_X86)
+static inline uint32_t
+crc32c_sse42_u8(uint8_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32b %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u16(uint16_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32w %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u32(uint32_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32l %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u64_mimic(uint64_t data, uint32_t init_val)
+{
+	union {
+		uint32_t u32[2];
+		uint64_t u64;
+	} d;
+
+	d.u64 = data;
+	init_val = crc32c_sse42_u32(d.u32[0], init_val);
+	init_val = crc32c_sse42_u32(d.u32[1], init_val);
+	return init_val;
+}
+#endif
+
+#ifdef RTE_ARCH_X86_64
+static inline uint32_t
+crc32c_sse42_u64(uint64_t data, uint32_t init_val)
+{
+	uint64_t val = init_val;
+
+	__asm__ volatile(
+			"crc32q %[data], %[init_val];"
+			: [init_val] "+r" (val)
+			: [data] "rm" (data));
+	return (uint32_t)val;
+}
+#endif
+
+#endif
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 6ab46ae9d..90a180bc8 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -1,8 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation

-headers = files('rte_crc_arm64.h',
-	'rte_fbk_hash.h',
+headers = files('rte_fbk_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
 	'rte_jhash.h',
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
deleted file mode 100644
index b4628cfc0..000000000
--- a/lib/librte_hash/rte_crc_arm64.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2015 Cavium, Inc
- */
-
-#ifndef _RTE_CRC_ARM64_H_
-#define _RTE_CRC_ARM64_H_
-
-/**
- * @file
- *
- * RTE CRC arm64 Hash
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <rte_cpuflags.h>
-#include <rte_branch_prediction.h>
-#include <rte_common.h>
-
-static inline uint32_t
-crc32c_arm64_u8(uint8_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cb %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u16(uint16_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32ch %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u32(uint32_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cw %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u64(uint64_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cx %w[crc], %w[crc], %x[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-/**
- * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
- * calculation.
- *
- * @param alg
- *   An OR of following flags:
- *   - (CRC32_SW) Don't use arm64 crc intrinsics
- *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
- *
- */
-static inline void
-rte_hash_crc_set_alg(uint8_t alg)
-{
-	switch (alg) {
-	case CRC32_ARM64:
-		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
-			alg = CRC32_SW;
-		/* fall-through */
-	case CRC32_SW:
-		crc32_alg = alg;
-		/* fall-through */
-	default:
-		break;
-	}
-}
-
-/* Setting the best available algorithm */
-RTE_INIT(rte_hash_crc_init_alg)
-{
-	rte_hash_crc_set_alg(CRC32_ARM64);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 1 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u8(data, init_val);
-
-	return crc32c_1byte(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 2 bytes value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u16(data, init_val);
-
-	return crc32c_2bytes(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 4 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u32(data, init_val);
-
-	return crc32c_1word(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 8 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg == CRC32_ARM64))
-		return crc32c_arm64_u64(data, init_val);
-
-	return crc32c_2words(data, init_val);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_CRC_ARM64_H_ */
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index cf28031b3..292697db1 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -21,6 +21,15 @@ extern "C" {
 #include <rte_branch_prediction.h>
 #include <rte_common.h>

+typedef uint32_t
+(*crc32_handler_1b)(uint8_t data, uint32_t init_val);
+typedef uint32_t
+(*crc32_handler_2b)(uint16_t data, uint32_t init_val);
+typedef uint32_t
+(*crc32_handler_4b)(uint32_t data, uint32_t init_val);
+typedef uint32_t
+(*crc32_handler_8b)(uint64_t data, uint32_t init_val);
+
 /* Lookup tables for software implementation of CRC32C */
 static const uint32_t crc32c_tables[8][256] = {{
  0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
@@ -322,7 +331,7 @@ crc32c_2bytes(uint16_t data, uint32_t init_val)
 }

 static inline uint32_t
-crc32c_1word(uint32_t data, uint32_t init_val)
+crc32c_4bytes(uint32_t data, uint32_t init_val)
 {
 	uint32_t crc, term1, term2;
 	crc = init_val;
@@ -336,7 +345,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)
 }

 static inline uint32_t
-crc32c_2words(uint64_t data, uint32_t init_val)
+crc32c_8bytes(uint64_t data, uint32_t init_val)
 {
 	uint32_t crc, term1, term2;
 	union {
@@ -357,109 +366,94 @@ crc32c_2words(uint64_t data, uint32_t init_val)

 	return crc;
 }
-
-#if defined(RTE_ARCH_X86)
-static inline uint32_t
-crc32c_sse42_u8(uint8_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32b %[data], %[init_val];"
-			: [init_val] "+r" (init_val)
-			: [data] "rm" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_sse42_u16(uint16_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32w %[data], %[init_val];"
-			: [init_val] "+r" (init_val)
-			: [data] "rm" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_sse42_u32(uint32_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32l %[data], %[init_val];"
-			: [init_val] "+r" (init_val)
-			: [data] "rm" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
-{
-	union {
-		uint32_t u32[2];
-		uint64_t u64;
-	} d;
-
-	d.u64 = data;
-	init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val);
-	init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val);
-	return (uint32_t)init_val;
-}
-#endif
-
-#ifdef RTE_ARCH_X86_64
-static inline uint32_t
-crc32c_sse42_u64(uint64_t data, uint64_t init_val)
-{
-	__asm__ volatile(
-			"crc32q %[data], %[init_val];"
-			: [init_val] "+r" (init_val)
-			: [data] "rm" (data));
-	return (uint32_t)init_val;
-}
-#endif
-
 #define CRC32_SW            (1U << 0)
 #define CRC32_SSE42         (1U << 1)
 #define CRC32_x64           (1U << 2)
 #define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
 #define CRC32_ARM64         (1U << 3)

-static uint8_t crc32_alg = CRC32_SW;
+static crc32_handler_1b crc32_1b = crc32c_1byte;
+static crc32_handler_2b crc32_2b = crc32c_2bytes;
+static crc32_handler_4b crc32_4b = crc32c_4bytes;
+static crc32_handler_8b crc32_8b = crc32c_8bytes;

 #if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
-#include "rte_crc_arm64.h"
-#else
+#include "crc_arm64.h"
+#endif
+
+#if defined(RTE_ARCH_X86)
+#include "crc_x86.h"
+#endif

 /**
- * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
  * calculation.
  *
  * @param alg
  *   An OR of following flags:
- *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
  *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
- *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
- *
+ *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
+ *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
  */
 static inline void
 rte_hash_crc_set_alg(uint8_t alg)
 {
-#if defined(RTE_ARCH_X86)
-	if (alg == CRC32_SSE42_x64 &&
-			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
-		alg = CRC32_SSE42;
+	switch (alg) {
+	case CRC32_SSE42_x64:
+#if defined RTE_ARCH_X86
+		crc32_1b = crc32c_sse42_u8;
+		crc32_2b = crc32c_sse42_u16;
+		crc32_4b = crc32c_sse42_u32;
+
+	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+		crc32_8b = crc32c_sse42_u64_mimic;
+	else
+		crc32_8b = crc32c_sse42_u64;
 #endif
-	crc32_alg = alg;
+		break;
+	case CRC32_SSE42:
+#if defined RTE_ARCH_X86
+		crc32_1b = crc32c_sse42_u8;
+		crc32_2b = crc32c_sse42_u16;
+		crc32_4b = crc32c_sse42_u32;
+		crc32_8b = crc32c_sse42_u64_mimic;
+#endif
+		break;
+	case CRC32_ARM64:
+#if defined RTE_ARCH_ARM64
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) {
+		crc32_1b = crc32c_arm64_u8;
+		crc32_2b = crc32c_arm64_u16;
+		crc32_4b = crc32c_arm64_u32;
+		crc32_8b = crc32c_arm64_u64;
+	}
+#endif
+		break;
+	case CRC32_SW:
+	default:
+		crc32_1b = crc32c_1byte;
+		crc32_2b = crc32c_2bytes;
+		crc32_4b = crc32c_4bytes;
+		crc32_8b = crc32c_8bytes;
+	break;
+	}
 }

 /* Setting the best available algorithm */
 RTE_INIT(rte_hash_crc_init_alg)
 {
+#if defined RTE_ARCH_X86
 	rte_hash_crc_set_alg(CRC32_SSE42_x64);
+#elif defined RTE_ARCH_ARM64
+	rte_hash_crc_set_alg(CRC32_ARM64);
+#else
+	rte_hash_crc_set_alg(CRC32_SW);
+#endif
 }

 /**
- * Use single crc32 instruction to perform a hash on a byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 1bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -471,18 +465,11 @@ RTE_INIT(rte_hash_crc_init_alg)
 static inline uint32_t
 rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
 {
-#if defined RTE_ARCH_X86
-	if (likely(crc32_alg & CRC32_SSE42))
-		return crc32c_sse42_u8(data, init_val);
-#endif
-
-	return crc32c_1byte(data, init_val);
+	return (crc32_1b)(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 2 bytes value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 2bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -494,18 +481,11 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
 {
-#if defined RTE_ARCH_X86
-	if (likely(crc32_alg & CRC32_SSE42))
-		return crc32c_sse42_u16(data, init_val);
-#endif
-
-	return crc32c_2bytes(data, init_val);
+	return (crc32_2b)(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 4 byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 4bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -517,18 +497,11 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 {
-#if defined RTE_ARCH_X86
-	if (likely(crc32_alg & CRC32_SSE42))
-		return crc32c_sse42_u32(data, init_val);
-#endif
-
-	return crc32c_1word(data, init_val);
+	return (crc32_4b)(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 8 byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 8bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -540,21 +513,9 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-#ifdef RTE_ARCH_X86_64
-	if (likely(crc32_alg == CRC32_SSE42_x64))
-		return crc32c_sse42_u64(data, init_val);
-#endif
-
-#if defined RTE_ARCH_X86
-	if (likely(crc32_alg & CRC32_SSE42))
-		return crc32c_sse42_u64_mimic(data, init_val);
-#endif
-
-	return crc32c_2words(data, init_val);
+	return (crc32_8b)(data, init_val);
 }

-#endif
-
 /**
  * Calculate CRC32 hash on user-supplied byte array.
  *
--
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-04-29 18:05 [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM pbhagavatula
@ 2020-04-30  9:14 ` Van Haaren, Harry
  2020-04-30  9:27   ` Pavan Nikhilesh Bhagavatula
  2020-05-06 22:02 ` Wang, Yipeng1
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Van Haaren, Harry @ 2020-04-30  9:14 UTC (permalink / raw)
  To: pbhagavatula, jerinj, thomas, Wang, Yipeng1, Gobriel, Sameh,
	Richardson, Bruce, Ruifeng Wang
  Cc: dev

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of pbhagavatula@marvell.com
> Sent: Wednesday, April 29, 2020 7:05 PM
> To: jerinj@marvell.com; thomas@monjalon.net; Wang, Yipeng1
> <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Ruifeng Wang
> <ruifeng.wang@arm.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Merge crc32 hash calculation public API headers for x86 and ARM,
> split implementations of x86 and ARM into their respective private
> headers.
> This reduces the ifdef code clutter while keeping current ABI intact.
> 
> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
> drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
> back to SW crc32 calculation for ARM platform.

<snip lots of stuff, to focus on meson install header change>

> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
> index 6ab46ae9d..90a180bc8 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -1,8 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2017 Intel Corporation
> 
> -headers = files('rte_crc_arm64.h',
> -	'rte_fbk_hash.h',
> +headers = files('rte_fbk_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',

Am I right in that previously an application could #include <rte_crc_arm64.h>  and hence if we no
longer install that file, this will cause a compilation failure on that application? Applications shouldn't
include arch specific headers... but we shouldn't knowingly remove publicly accessible includes either.

Perhaps consider just installing a dummy header file if the code cleanup in the rest of the patch is desired?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-04-30  9:14 ` Van Haaren, Harry
@ 2020-04-30  9:27   ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-04-30  9:27 UTC (permalink / raw)
  To: Van Haaren, Harry, Jerin Jacob Kollanukkaran, thomas, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> -----Original Message-----
>> From: dev <dev-bounces@dpdk.org> On Behalf Of
>pbhagavatula@marvell.com
>> Sent: Wednesday, April 29, 2020 7:05 PM
>> To: jerinj@marvell.com; thomas@monjalon.net; Wang, Yipeng1
>> <yipeng1.wang@intel.com>; Gobriel, Sameh
><sameh.gobriel@intel.com>;
>> Richardson, Bruce <bruce.richardson@intel.com>; Ruifeng Wang
>> <ruifeng.wang@arm.com>
>> Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@marvell.com>
>> Subject: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and
>ARM
>>
>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Merge crc32 hash calculation public API headers for x86 and ARM,
>> split implementations of x86 and ARM into their respective private
>> headers.
>> This reduces the ifdef code clutter while keeping current ABI intact.
>>
>> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
>> drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
>> back to SW crc32 calculation for ARM platform.
>
><snip lots of stuff, to focus on meson install header change>
>
>> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
>> index 6ab46ae9d..90a180bc8 100644
>> --- a/lib/librte_hash/meson.build
>> +++ b/lib/librte_hash/meson.build
>> @@ -1,8 +1,7 @@
>>  # SPDX-License-Identifier: BSD-3-Clause
>>  # Copyright(c) 2017 Intel Corporation
>>
>> -headers = files('rte_crc_arm64.h',
>> -	'rte_fbk_hash.h',
>> +headers = files('rte_fbk_hash.h',
>>  	'rte_hash_crc.h',
>>  	'rte_hash.h',
>>  	'rte_jhash.h',
>
>Am I right in that previously an application could #include
><rte_crc_arm64.h>  and hence if we no
>longer install that file, this will cause a compilation failure on that
>application? Applications shouldn't
>include arch specific headers... but we shouldn't knowingly remove
>publicly accessible includes either.
>
>Perhaps consider just installing a dummy header file if the code cleanup
>in the rest of the patch is desired?

Sure we could either symlink `rte_hash_crc.h` as `rte_crc_arm64.h` or
Just include rte_hash_crc.h in rte_crc_arm64.h for now and remove
rte_crc_arm64.h later with a deprecation notice? 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-04-29 18:05 [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM pbhagavatula
  2020-04-30  9:14 ` Van Haaren, Harry
@ 2020-05-06 22:02 ` Wang, Yipeng1
  2020-05-10 22:49   ` Pavan Nikhilesh Bhagavatula
  2020-05-08 12:55 ` Ananyev, Konstantin
  2020-05-12 20:40 ` [dpdk-dev] [RFC v2] " pbhagavatula
  3 siblings, 1 reply; 16+ messages in thread
From: Wang, Yipeng1 @ 2020-05-06 22:02 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Van Haaren, Harry,
	Jerin Jacob Kollanukkaran, thomas, Gobriel, Sameh, Richardson,
	Bruce, Ruifeng Wang
  Cc: dev

> -----Original Message-----
> From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> Sent: Wednesday, April 29, 2020 11:05 AM
> To: jerinj@marvell.com; thomas@monjalon.net; Wang, Yipeng1
> <yipeng1.wang@intel.com>; Gobriel, Sameh <sameh.gobriel@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Ruifeng Wang
> <ruifeng.wang@arm.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Merge crc32 hash calculation public API headers for x86 and ARM, split
> implementations of x86 and ARM into their respective private headers.
> This reduces the ifdef code clutter while keeping current ABI intact.
> 
> Although we install `rte_crc_arm64.h` it is not used in any of the lib or drivers
> layers. All the libs and drivers use `rte_hash_crc.h` which falls back to SW
> crc32 calculation for ARM platform.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> 
>  Currently, if application incorrectly sets CRC32_ARM64 as crc32 algorithm
> through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to
> algorithm  set previously via `rte_hash_crc_set_alg()` instead of setting the
> best  available.
>  This behaviour should probably change to setting the best available
> algorithm  and is up for discussion.

[Wang, Yipeng] Should it be an illegal flag if I set ARM64
On x86? I am thinking we should generate a warning message into logs if this happens.
> 
>  app/test/test_hash.c            |   6 +
>  lib/librte_hash/Makefile        |   5 -
>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>  lib/librte_hash/meson.build     |   3 +-
>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
> lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-------------------
>  7 files changed, 219 insertions(+), 306 deletions(-)  create mode 100644
> lib/librte_hash/crc_arm64.h  create mode 100644 lib/librte_hash/crc_x86.h
> delete mode 100644 lib/librte_hash/rte_crc_arm64.h
> 
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c index
> afa3a1a3c..7bd457dac 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>  	}
> 
>  	/* Resetting to best available algorithm */
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
> 
>  	if (i == CRC32_ITERATIONS)
>  		return 0;
> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
> ec9f86499..f640afc42 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
> # install this header file  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include :=
> rte_hash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> rte_hash_crc.h -ifeq ($(CONFIG_RTE_ARCH_ARM64),y) -ifneq ($(findstring
> RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h -endif
> -endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h  SYMLINK-
> $(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h diff --git
> a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h new file mode
> 100644 index 000000000..8e75f8297
> --- /dev/null
> +++ b/lib/librte_hash/crc_arm64.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2015 Cavium, Inc
> + */
> +
> +#ifndef _CRC_ARM64_H_
> +#define _CRC_ARM64_H_
> +
> +/**
> + * @file
> + *
> + * CRC arm64 Hash
> + */
[Wang, Yipeng] It is hidden now so do we still need this doxygen region above?
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +#include <rte_cpuflags.h>
> +#include <rte_branch_prediction.h>
> +#include <rte_common.h>
[Wang, Yipeng] I don't think we need all these headers in this file.
> +
> +static inline uint32_t
> +crc32c_arm64_u8(uint8_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cb %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u16(uint16_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32ch %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u32(uint32_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cw %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u64(uint64_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cx %w[crc], %w[crc], %x[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/crc_x86.h b/lib/librte_hash/crc_x86.h new file
> mode 100644 index 000000000..fb45f2e98
> --- /dev/null
> +++ b/lib/librte_hash/crc_x86.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2010-2014 Intel Corporation  */
> +
> +#ifndef _CRC_X86_H_
> +#define _CRC_X86_H_
> +
> +#if defined(RTE_ARCH_X86)
> +static inline uint32_t
> +crc32c_sse42_u8(uint8_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32b %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u16(uint16_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32w %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u32(uint32_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32l %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u64_mimic(uint64_t data, uint32_t init_val) {
> +	union {
> +		uint32_t u32[2];
> +		uint64_t u64;
> +	} d;
> +
> +	d.u64 = data;
> +	init_val = crc32c_sse42_u32(d.u32[0], init_val);
> +	init_val = crc32c_sse42_u32(d.u32[1], init_val);
> +	return init_val;
> +}
> +#endif
> +
> +#ifdef RTE_ARCH_X86_64
> +static inline uint32_t
> +crc32c_sse42_u64(uint64_t data, uint32_t init_val) {
> +	uint64_t val = init_val;
> +
> +	__asm__ volatile(
> +			"crc32q %[data], %[init_val];"
> +			: [init_val] "+r" (val)
> +			: [data] "rm" (data));
> +	return (uint32_t)val;
> +}
> +#endif
> +
> +#endif
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae9d..90a180bc8 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -1,8 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel
> Corporation
> 
> -headers = files('rte_crc_arm64.h',
> -	'rte_fbk_hash.h',
> +headers = files('rte_fbk_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
> diff --git a/lib/librte_hash/rte_crc_arm64.h
> b/lib/librte_hash/rte_crc_arm64.h deleted file mode 100644 index
> b4628cfc0..000000000
> --- a/lib/librte_hash/rte_crc_arm64.h
> +++ /dev/null
> @@ -1,183 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2015 Cavium, Inc
> - */
> -
> -#ifndef _RTE_CRC_ARM64_H_
> -#define _RTE_CRC_ARM64_H_
> -
> -/**
> - * @file
> - *
> - * RTE CRC arm64 Hash
> - */
> -
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <stdint.h>
> -#include <rte_cpuflags.h>
> -#include <rte_branch_prediction.h>
> -#include <rte_common.h>
> -
> -static inline uint32_t
> -crc32c_arm64_u8(uint8_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cb %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u16(uint16_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32ch %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u32(uint32_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cw %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u64(uint64_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cx %w[crc], %w[crc], %x[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -/**
> - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
> - * calculation.
> - *
> - * @param alg
> - *   An OR of following flags:
> - *   - (CRC32_SW) Don't use arm64 crc intrinsics
> - *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> - *
> - */
> -static inline void
> -rte_hash_crc_set_alg(uint8_t alg)
> -{
> -	switch (alg) {
> -	case CRC32_ARM64:
> -		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> -			alg = CRC32_SW;
> -		/* fall-through */
> -	case CRC32_SW:
> -		crc32_alg = alg;
> -		/* fall-through */
> -	default:
> -		break;
> -	}
> -}
> -
> -/* Setting the best available algorithm */
> -RTE_INIT(rte_hash_crc_init_alg)
> -{
> -	rte_hash_crc_set_alg(CRC32_ARM64);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 1 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u8(data, init_val);
> -
> -	return crc32c_1byte(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u16(data, init_val);
> -
> -	return crc32c_2bytes(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u32(data, init_val);
> -
> -	return crc32c_1word(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg == CRC32_ARM64))
> -		return crc32c_arm64_u64(data, init_val);
> -
> -	return crc32c_2words(data, init_val);
> -}
> -
> -#ifdef __cplusplus
> -}
> -#endif
> -
> -#endif /* _RTE_CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index cf28031b3..292697db1 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -21,6 +21,15 @@ extern "C" {
>  #include <rte_branch_prediction.h>
>  #include <rte_common.h>
> 
> +typedef uint32_t
> +(*crc32_handler_1b)(uint8_t data, uint32_t init_val); typedef uint32_t
> +(*crc32_handler_2b)(uint16_t data, uint32_t init_val); typedef uint32_t
> +(*crc32_handler_4b)(uint32_t data, uint32_t init_val); typedef uint32_t
> +(*crc32_handler_8b)(uint64_t data, uint32_t init_val);

[Wang, Yipeng] I guess functions pointers makes the code cleaner.
I am just not sure on performance implications for indirect call.
@Harry for his comment.

> +
>  /* Lookup tables for software implementation of CRC32C */  static const
> uint32_t crc32c_tables[8][256] = {{
>   0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C,
> 0x26A1E7E8, 0xD4CA64EB, @@ -322,7 +331,7 @@ crc32c_2bytes(uint16_t
> data, uint32_t init_val)  }
> 
>  static inline uint32_t
> -crc32c_1word(uint32_t data, uint32_t init_val)
> +crc32c_4bytes(uint32_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	crc = init_val;
> @@ -336,7 +345,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)  }
> 
>  static inline uint32_t
> -crc32c_2words(uint64_t data, uint32_t init_val)
> +crc32c_8bytes(uint64_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	union {
> @@ -357,109 +366,94 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> 
>  	return crc;
>  }
> -
> -#if defined(RTE_ARCH_X86)
> -static inline uint32_t
> -crc32c_sse42_u8(uint8_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32b %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u16(uint16_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32w %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u32(uint32_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32l %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) -{
> -	union {
> -		uint32_t u32[2];
> -		uint64_t u64;
> -	} d;
> -
> -	d.u64 = data;
> -	init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val);
> -	init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val);
> -	return (uint32_t)init_val;
> -}
> -#endif
> -
> -#ifdef RTE_ARCH_X86_64
> -static inline uint32_t
> -crc32c_sse42_u64(uint64_t data, uint64_t init_val) -{
> -	__asm__ volatile(
> -			"crc32q %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return (uint32_t)init_val;
> -}
> -#endif
> -
[Wang, Yipeng] keep the blank line before define.
>  #define CRC32_SW            (1U << 0)
>  #define CRC32_SSE42         (1U << 1)
>  #define CRC32_x64           (1U << 2)
>  #define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
>  #define CRC32_ARM64         (1U << 3)
> 
> -static uint8_t crc32_alg = CRC32_SW;
> +static crc32_handler_1b crc32_1b = crc32c_1byte; static
> +crc32_handler_2b crc32_2b = crc32c_2bytes; static crc32_handler_4b
> +crc32_4b = crc32c_4bytes; static crc32_handler_8b crc32_8b =
> +crc32c_8bytes;
> 
>  #if defined(RTE_ARCH_ARM64) &&
> defined(RTE_MACHINE_CPUFLAG_CRC32)
> -#include "rte_crc_arm64.h"
> -#else
> +#include "crc_arm64.h"
> +#endif
> +
> +#if defined(RTE_ARCH_X86)
> +#include "crc_x86.h"
> +#endif
> 
>  /**
> - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
>   * calculation.
>   *
>   * @param alg
>   *   An OR of following flags:
> - *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
>   *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
> - *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
> - *
> + *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
> + *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
>   */
>  static inline void
>  rte_hash_crc_set_alg(uint8_t alg)
>  {
> -#if defined(RTE_ARCH_X86)
> -	if (alg == CRC32_SSE42_x64 &&
> -			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> -		alg = CRC32_SSE42;
> +	switch (alg) {
> +	case CRC32_SSE42_x64:
> +#if defined RTE_ARCH_X86
> +		crc32_1b = crc32c_sse42_u8;
> +		crc32_2b = crc32c_sse42_u16;
> +		crc32_4b = crc32c_sse42_u32;
> +
> +	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> +		crc32_8b = crc32c_sse42_u64_mimic;
> +	else
> +		crc32_8b = crc32c_sse42_u64;
>  #endif
> -	crc32_alg = alg;
> +		break;
> +	case CRC32_SSE42:
> +#if defined RTE_ARCH_X86
> +		crc32_1b = crc32c_sse42_u8;
> +		crc32_2b = crc32c_sse42_u16;
> +		crc32_4b = crc32c_sse42_u32;
> +		crc32_8b = crc32c_sse42_u64_mimic;
> +#endif
> +		break;
> +	case CRC32_ARM64:
> +#if defined RTE_ARCH_ARM64
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) {
> +		crc32_1b = crc32c_arm64_u8;
> +		crc32_2b = crc32c_arm64_u16;
> +		crc32_4b = crc32c_arm64_u32;
> +		crc32_8b = crc32c_arm64_u64;
> +	}
> +#endif
> +		break;
> +	case CRC32_SW:
> +	default:
> +		crc32_1b = crc32c_1byte;
> +		crc32_2b = crc32c_2bytes;
> +		crc32_4b = crc32c_4bytes;
> +		crc32_8b = crc32c_8bytes;
> +	break;
> +	}
>  }
> 
>  /* Setting the best available algorithm */
>  RTE_INIT(rte_hash_crc_init_alg)
>  {
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 1bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -471,18 +465,11 @@ RTE_INIT(rte_hash_crc_init_alg)  static inline
> uint32_t  rte_hash_crc_1byte(uint8_t data, uint32_t init_val)  { -#if defined
> RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u8(data, init_val);
> -#endif
> -
> -	return crc32c_1byte(data, init_val);
> +	return (crc32_1b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 2bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -494,18 +481,11 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
> static inline uint32_t  rte_hash_crc_2byte(uint16_t data, uint32_t init_val)  { -
> #if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u16(data, init_val);
> -#endif
> -
> -	return crc32c_2bytes(data, init_val);
> +	return (crc32_2b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 4bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -517,18 +497,11 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
> static inline uint32_t  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)  { -
> #if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u32(data, init_val);
> -#endif
> -
> -	return crc32c_1word(data, init_val);
> +	return (crc32_4b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 8bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -540,21 +513,9 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> static inline uint32_t  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)  { -
> #ifdef RTE_ARCH_X86_64
> -	if (likely(crc32_alg == CRC32_SSE42_x64))
> -		return crc32c_sse42_u64(data, init_val);
> -#endif
> -
> -#if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u64_mimic(data, init_val);
> -#endif
> -
> -	return crc32c_2words(data, init_val);
> +	return (crc32_8b)(data, init_val);
>  }
> 
> -#endif
> -
>  /**
>   * Calculate CRC32 hash on user-supplied byte array.
>   *
> --
> 2.17.1
[Wang, Yipeng] 
Thanks for the patch, please see my comment inlined.

I think it is good to hide the architecture specific headers.
And I agree with Harry that we shouldn't silently remove a public header.
I don't know the best practice on handling this though (e.g. symlink or dummyfile), either way looks fine to me.
Bruce/Thomas may comment.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-04-29 18:05 [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM pbhagavatula
  2020-04-30  9:14 ` Van Haaren, Harry
  2020-05-06 22:02 ` Wang, Yipeng1
@ 2020-05-08 12:55 ` Ananyev, Konstantin
  2020-05-10 22:53   ` Pavan Nikhilesh Bhagavatula
  2020-05-12 20:40 ` [dpdk-dev] [RFC v2] " pbhagavatula
  3 siblings, 1 reply; 16+ messages in thread
From: Ananyev, Konstantin @ 2020-05-08 12:55 UTC (permalink / raw)
  To: pbhagavatula, jerinj, thomas, Wang, Yipeng1, Gobriel, Sameh,
	Richardson, Bruce, Ruifeng Wang
  Cc: dev


> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Merge crc32 hash calculation public API headers for x86 and ARM,
> split implementations of x86 and ARM into their respective private
> headers.
> This reduces the ifdef code clutter while keeping current ABI intact.
> 
> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
> drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
> back to SW crc32 calculation for ARM platform.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> 
>  Currently, if application incorrectly sets CRC32_ARM64 as crc32 algorithm
>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to algorithm
>  set previously via `rte_hash_crc_set_alg()` instead of setting the best
>  available.
>  This behaviour should probably change to setting the best available algorithm
>  and is up for discussion.
> 
>  app/test/test_hash.c            |   6 +
>  lib/librte_hash/Makefile        |   5 -
>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>  lib/librte_hash/meson.build     |   3 +-
>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-------------------
>  7 files changed, 219 insertions(+), 306 deletions(-)
>  create mode 100644 lib/librte_hash/crc_arm64.h
>  create mode 100644 lib/librte_hash/crc_x86.h
>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
> 
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> index afa3a1a3c..7bd457dac 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>  	}
> 
>  	/* Resetting to best available algorithm */
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
> 
>  	if (i == CRC32_ITERATIONS)
>  		return 0;
> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> index ec9f86499..f640afc42 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
>  # install this header file
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
> -endif
> -endif
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> diff --git a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h
> new file mode 100644
> index 000000000..8e75f8297

Wouldn't that break 'make  install T=...'?
As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.).
Same question about external apps, where they would get from these headers?

> --- /dev/null
> +++ b/lib/librte_hash/crc_arm64.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2015 Cavium, Inc
> + */
> +
> +#ifndef _CRC_ARM64_H_
> +#define _CRC_ARM64_H_
> +
> +/**
> + * @file
> + *
> + * CRC arm64 Hash
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +#include <rte_cpuflags.h>
> +#include <rte_branch_prediction.h>
> +#include <rte_common.h>
> +
> +static inline uint32_t
> +crc32c_arm64_u8(uint8_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32cb %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u16(uint16_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32ch %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u32(uint32_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32cw %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u64(uint64_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32cx %w[crc], %w[crc], %x[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/crc_x86.h b/lib/librte_hash/crc_x86.h
> new file mode 100644
> index 000000000..fb45f2e98
> --- /dev/null
> +++ b/lib/librte_hash/crc_x86.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2010-2014 Intel Corporation
> + */
> +
> +#ifndef _CRC_X86_H_
> +#define _CRC_X86_H_
> +
> +#if defined(RTE_ARCH_X86)
> +static inline uint32_t
> +crc32c_sse42_u8(uint8_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32b %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u16(uint16_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32w %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u32(uint32_t data, uint32_t init_val)
> +{
> +	__asm__ volatile(
> +			"crc32l %[data], %[init_val];"
> +			: [init_val] "+r" (init_val)
> +			: [data] "rm" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_sse42_u64_mimic(uint64_t data, uint32_t init_val)
> +{
> +	union {
> +		uint32_t u32[2];
> +		uint64_t u64;
> +	} d;
> +
> +	d.u64 = data;
> +	init_val = crc32c_sse42_u32(d.u32[0], init_val);
> +	init_val = crc32c_sse42_u32(d.u32[1], init_val);
> +	return init_val;
> +}
> +#endif
> +
> +#ifdef RTE_ARCH_X86_64
> +static inline uint32_t
> +crc32c_sse42_u64(uint64_t data, uint32_t init_val)
> +{
> +	uint64_t val = init_val;
> +
> +	__asm__ volatile(
> +			"crc32q %[data], %[init_val];"
> +			: [init_val] "+r" (val)
> +			: [data] "rm" (data));
> +	return (uint32_t)val;
> +}
> +#endif
> +
> +#endif
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
> index 6ab46ae9d..90a180bc8 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -1,8 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2017 Intel Corporation
> 
> -headers = files('rte_crc_arm64.h',
> -	'rte_fbk_hash.h',
> +headers = files('rte_fbk_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
> diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
> deleted file mode 100644
> index b4628cfc0..000000000
> --- a/lib/librte_hash/rte_crc_arm64.h
> +++ /dev/null
> @@ -1,183 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2015 Cavium, Inc
> - */
> -
> -#ifndef _RTE_CRC_ARM64_H_
> -#define _RTE_CRC_ARM64_H_
> -
> -/**
> - * @file
> - *
> - * RTE CRC arm64 Hash
> - */
> -
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <stdint.h>
> -#include <rte_cpuflags.h>
> -#include <rte_branch_prediction.h>
> -#include <rte_common.h>
> -
> -static inline uint32_t
> -crc32c_arm64_u8(uint8_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32cb %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u16(uint16_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32ch %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u32(uint32_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32cw %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u64(uint64_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32cx %w[crc], %w[crc], %x[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -/**
> - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
> - * calculation.
> - *
> - * @param alg
> - *   An OR of following flags:
> - *   - (CRC32_SW) Don't use arm64 crc intrinsics
> - *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> - *
> - */
> -static inline void
> -rte_hash_crc_set_alg(uint8_t alg)
> -{
> -	switch (alg) {
> -	case CRC32_ARM64:
> -		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> -			alg = CRC32_SW;
> -		/* fall-through */
> -	case CRC32_SW:
> -		crc32_alg = alg;
> -		/* fall-through */
> -	default:
> -		break;
> -	}
> -}
> -
> -/* Setting the best available algorithm */
> -RTE_INIT(rte_hash_crc_init_alg)
> -{
> -	rte_hash_crc_set_alg(CRC32_ARM64);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 1 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
> -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u8(data, init_val);
> -
> -	return crc32c_1byte(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
> -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u16(data, init_val);
> -
> -	return crc32c_2bytes(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u32(data, init_val);
> -
> -	return crc32c_1word(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
> -{
> -	if (likely(crc32_alg == CRC32_ARM64))
> -		return crc32c_arm64_u64(data, init_val);
> -
> -	return crc32c_2words(data, init_val);
> -}
> -
> -#ifdef __cplusplus
> -}
> -#endif
> -
> -#endif /* _RTE_CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index cf28031b3..292697db1 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -21,6 +21,15 @@ extern "C" {
>  #include <rte_branch_prediction.h>
>  #include <rte_common.h>
> 
> +typedef uint32_t
> +(*crc32_handler_1b)(uint8_t data, uint32_t init_val);
> +typedef uint32_t
> +(*crc32_handler_2b)(uint16_t data, uint32_t init_val);
> +typedef uint32_t
> +(*crc32_handler_4b)(uint32_t data, uint32_t init_val);
> +typedef uint32_t
> +(*crc32_handler_8b)(uint64_t data, uint32_t init_val);
> +
>  /* Lookup tables for software implementation of CRC32C */
>  static const uint32_t crc32c_tables[8][256] = {{
>   0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
> @@ -322,7 +331,7 @@ crc32c_2bytes(uint16_t data, uint32_t init_val)
>  }
> 
>  static inline uint32_t
> -crc32c_1word(uint32_t data, uint32_t init_val)
> +crc32c_4bytes(uint32_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	crc = init_val;
> @@ -336,7 +345,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)
>  }
> 
>  static inline uint32_t
> -crc32c_2words(uint64_t data, uint32_t init_val)
> +crc32c_8bytes(uint64_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	union {
> @@ -357,109 +366,94 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> 
>  	return crc;
>  }
> -
> -#if defined(RTE_ARCH_X86)
> -static inline uint32_t
> -crc32c_sse42_u8(uint8_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32b %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u16(uint16_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32w %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u32(uint32_t data, uint32_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32l %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
> -{
> -	union {
> -		uint32_t u32[2];
> -		uint64_t u64;
> -	} d;
> -
> -	d.u64 = data;
> -	init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val);
> -	init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val);
> -	return (uint32_t)init_val;
> -}
> -#endif
> -
> -#ifdef RTE_ARCH_X86_64
> -static inline uint32_t
> -crc32c_sse42_u64(uint64_t data, uint64_t init_val)
> -{
> -	__asm__ volatile(
> -			"crc32q %[data], %[init_val];"
> -			: [init_val] "+r" (init_val)
> -			: [data] "rm" (data));
> -	return (uint32_t)init_val;
> -}
> -#endif
> -
>  #define CRC32_SW            (1U << 0)
>  #define CRC32_SSE42         (1U << 1)
>  #define CRC32_x64           (1U << 2)
>  #define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
>  #define CRC32_ARM64         (1U << 3)
> 
> -static uint8_t crc32_alg = CRC32_SW;
> +static crc32_handler_1b crc32_1b = crc32c_1byte;
> +static crc32_handler_2b crc32_2b = crc32c_2bytes;
> +static crc32_handler_4b crc32_4b = crc32c_4bytes;
> +static crc32_handler_8b crc32_8b = crc32c_8bytes;
> 
>  #if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
> -#include "rte_crc_arm64.h"
> -#else
> +#include "crc_arm64.h"
> +#endif
> +
> +#if defined(RTE_ARCH_X86)
> +#include "crc_x86.h"
> +#endif
> 
>  /**
> - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
>   * calculation.
>   *
>   * @param alg
>   *   An OR of following flags:
> - *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
>   *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
> - *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
> - *
> + *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
> + *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
>   */
>  static inline void
>  rte_hash_crc_set_alg(uint8_t alg)
>  {
> -#if defined(RTE_ARCH_X86)
> -	if (alg == CRC32_SSE42_x64 &&
> -			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> -		alg = CRC32_SSE42;
> +	switch (alg) {
> +	case CRC32_SSE42_x64:
> +#if defined RTE_ARCH_X86
> +		crc32_1b = crc32c_sse42_u8;
> +		crc32_2b = crc32c_sse42_u16;
> +		crc32_4b = crc32c_sse42_u32;
> +
> +	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> +		crc32_8b = crc32c_sse42_u64_mimic;
> +	else
> +		crc32_8b = crc32c_sse42_u64;
>  #endif
> -	crc32_alg = alg;
> +		break;
> +	case CRC32_SSE42:
> +#if defined RTE_ARCH_X86
> +		crc32_1b = crc32c_sse42_u8;
> +		crc32_2b = crc32c_sse42_u16;
> +		crc32_4b = crc32c_sse42_u32;
> +		crc32_8b = crc32c_sse42_u64_mimic;
> +#endif
> +		break;
> +	case CRC32_ARM64:
> +#if defined RTE_ARCH_ARM64
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) {
> +		crc32_1b = crc32c_arm64_u8;
> +		crc32_2b = crc32c_arm64_u16;
> +		crc32_4b = crc32c_arm64_u32;
> +		crc32_8b = crc32c_arm64_u64;
> +	}
> +#endif
> +		break;
> +	case CRC32_SW:
> +	default:
> +		crc32_1b = crc32c_1byte;
> +		crc32_2b = crc32c_2bytes;
> +		crc32_4b = crc32c_4bytes;
> +		crc32_8b = crc32c_8bytes;
> +	break;
> +	}
>  }
> 
>  /* Setting the best available algorithm */
>  RTE_INIT(rte_hash_crc_init_alg)
>  {
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 1bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -471,18 +465,11 @@ RTE_INIT(rte_hash_crc_init_alg)
>  static inline uint32_t
>  rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
>  {
> -#if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u8(data, init_val);
> -#endif
> -
> -	return crc32c_1byte(data, init_val);
> +	return (crc32_1b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 2bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -494,18 +481,11 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
>  {
> -#if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u16(data, init_val);
> -#endif
> -
> -	return crc32c_2bytes(data, init_val);
> +	return (crc32_2b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 4bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -517,18 +497,11 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  {
> -#if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u32(data, init_val);
> -#endif
> -
> -	return crc32c_1word(data, init_val);
> +	return (crc32_4b)(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 8bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -540,21 +513,9 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
>  static inline uint32_t
>  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
>  {
> -#ifdef RTE_ARCH_X86_64
> -	if (likely(crc32_alg == CRC32_SSE42_x64))
> -		return crc32c_sse42_u64(data, init_val);
> -#endif
> -
> -#if defined RTE_ARCH_X86
> -	if (likely(crc32_alg & CRC32_SSE42))
> -		return crc32c_sse42_u64_mimic(data, init_val);
> -#endif
> -
> -	return crc32c_2words(data, init_val);
> +	return (crc32_8b)(data, init_val);
>  }
> 
> -#endif
> -
>  /**
>   * Calculate CRC32 hash on user-supplied byte array.
>   *
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-06 22:02 ` Wang, Yipeng1
@ 2020-05-10 22:49   ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-05-10 22:49 UTC (permalink / raw)
  To: Wang, Yipeng1, Van Haaren, Harry, Jerin Jacob Kollanukkaran,
	thomas, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> Subject: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and
>ARM
>>
>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Merge crc32 hash calculation public API headers for x86 and ARM, split
>> implementations of x86 and ARM into their respective private
>headers.
>> This reduces the ifdef code clutter while keeping current ABI intact.
>>
>> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
>drivers
>> layers. All the libs and drivers use `rte_hash_crc.h` which falls back to
>SW
>> crc32 calculation for ARM platform.
>>
>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> ---
>>
>>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
>algorithm
>> through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to
>> algorithm  set previously via `rte_hash_crc_set_alg()` instead of
>setting the
>> best  available.
>>  This behaviour should probably change to setting the best available
>> algorithm  and is up for discussion.
>
>[Wang, Yipeng] Should it be an illegal flag if I set ARM64
>On x86? I am thinking we should generate a warning message into logs
>if this happens.

The current behavior (with and without this patch) is to fallback to software CRC.
Warning log sounds good. Maybe we can fallback to best available CRC mode on 
the platform too.

>>
>>  app/test/test_hash.c            |   6 +
>>  lib/librte_hash/Makefile        |   5 -
>>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>>  lib/librte_hash/meson.build     |   3 +-
>>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
>> lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-------------------
>>  7 files changed, 219 insertions(+), 306 deletions(-)  create mode
>100644
>> lib/librte_hash/crc_arm64.h  create mode 100644
>lib/librte_hash/crc_x86.h
>> delete mode 100644 lib/librte_hash/rte_crc_arm64.h
>>
>> diff --git a/app/test/test_hash.c b/app/test/test_hash.c index
>> afa3a1a3c..7bd457dac 100644
>> --- a/app/test/test_hash.c
>> +++ b/app/test/test_hash.c
>> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>>  	}
>>
>>  	/* Resetting to best available algorithm */
>> +#if defined RTE_ARCH_X86
>>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> +#elif defined RTE_ARCH_ARM64
>> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> +#else
>> +	rte_hash_crc_set_alg(CRC32_SW);
>> +#endif
>>
>>  	if (i == CRC32_ITERATIONS)
>>  		return 0;
>> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile index
>> ec9f86499..f640afc42 100644
>> --- a/lib/librte_hash/Makefile
>> +++ b/lib/librte_hash/Makefile
>> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
>rte_fbk_hash.c
>> # install this header file  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-
>include :=
>> rte_hash.h  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> rte_hash_crc.h -ifeq ($(CONFIG_RTE_ARCH_ARM64),y) -ifneq
>($(findstring
>> RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
>> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
>-endif
>> -endif  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_jhash.h
>> SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>SYMLINK-
>> $(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h diff --git
>> a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h new file
>mode
>> 100644 index 000000000..8e75f8297
>> --- /dev/null
>> +++ b/lib/librte_hash/crc_arm64.h
>> @@ -0,0 +1,67 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2015 Cavium, Inc
>> + */
>> +
>> +#ifndef _CRC_ARM64_H_
>> +#define _CRC_ARM64_H_
>> +
>> +/**
>> + * @file
>> + *
>> + * CRC arm64 Hash
>> + */
>[Wang, Yipeng] It is hidden now so do we still need this doxygen region
>above?

I will remove in next version.

>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +#include <stdint.h>
>> +#include <rte_cpuflags.h>
>> +#include <rte_branch_prediction.h>
>> +#include <rte_common.h>
>[Wang, Yipeng] I don't think we need all these headers in this file.
>> +

Will remove in next version.

>> +static inline uint32_t
>> +crc32c_arm64_u8(uint8_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32cb %w[crc], %w[crc], %w[value]"
>> +			: [crc] "+r" (init_val)
>> +			: [value] "r" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_arm64_u16(uint16_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32ch %w[crc], %w[crc], %w[value]"
>> +			: [crc] "+r" (init_val)
>> +			: [value] "r" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_arm64_u32(uint32_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32cw %w[crc], %w[crc], %w[value]"
>> +			: [crc] "+r" (init_val)
>> +			: [value] "r" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_arm64_u64(uint64_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32cx %w[crc], %w[crc], %x[value]"
>> +			: [crc] "+r" (init_val)
>> +			: [value] "r" (data));
>> +	return init_val;
>> +}
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif /* _CRC_ARM64_H_ */
>> diff --git a/lib/librte_hash/crc_x86.h b/lib/librte_hash/crc_x86.h new
>file
>> mode 100644 index 000000000..fb45f2e98
>> --- /dev/null
>> +++ b/lib/librte_hash/crc_x86.h
>> @@ -0,0 +1,68 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2010-2014 Intel Corporation  */
>> +
>> +#ifndef _CRC_X86_H_
>> +#define _CRC_X86_H_
>> +
>> +#if defined(RTE_ARCH_X86)
>> +static inline uint32_t
>> +crc32c_sse42_u8(uint8_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32b %[data], %[init_val];"
>> +			: [init_val] "+r" (init_val)
>> +			: [data] "rm" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_sse42_u16(uint16_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32w %[data], %[init_val];"
>> +			: [init_val] "+r" (init_val)
>> +			: [data] "rm" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_sse42_u32(uint32_t data, uint32_t init_val) {
>> +	__asm__ volatile(
>> +			"crc32l %[data], %[init_val];"
>> +			: [init_val] "+r" (init_val)
>> +			: [data] "rm" (data));
>> +	return init_val;
>> +}
>> +
>> +static inline uint32_t
>> +crc32c_sse42_u64_mimic(uint64_t data, uint32_t init_val) {
>> +	union {
>> +		uint32_t u32[2];
>> +		uint64_t u64;
>> +	} d;
>> +
>> +	d.u64 = data;
>> +	init_val = crc32c_sse42_u32(d.u32[0], init_val);
>> +	init_val = crc32c_sse42_u32(d.u32[1], init_val);
>> +	return init_val;
>> +}
>> +#endif
>> +
>> +#ifdef RTE_ARCH_X86_64
>> +static inline uint32_t
>> +crc32c_sse42_u64(uint64_t data, uint32_t init_val) {
>> +	uint64_t val = init_val;
>> +
>> +	__asm__ volatile(
>> +			"crc32q %[data], %[init_val];"
>> +			: [init_val] "+r" (val)
>> +			: [data] "rm" (data));
>> +	return (uint32_t)val;
>> +}
>> +#endif
>> +
>> +#endif
>> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
>index
>> 6ab46ae9d..90a180bc8 100644
>> --- a/lib/librte_hash/meson.build
>> +++ b/lib/librte_hash/meson.build
>> @@ -1,8 +1,7 @@
>>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel
>> Corporation
>>
>> -headers = files('rte_crc_arm64.h',
>> -	'rte_fbk_hash.h',
>> +headers = files('rte_fbk_hash.h',
>>  	'rte_hash_crc.h',
>>  	'rte_hash.h',
>>  	'rte_jhash.h',
>> diff --git a/lib/librte_hash/rte_crc_arm64.h
>> b/lib/librte_hash/rte_crc_arm64.h deleted file mode 100644 index
>> b4628cfc0..000000000
>> --- a/lib/librte_hash/rte_crc_arm64.h
>> +++ /dev/null
>> @@ -1,183 +0,0 @@
>> -/* SPDX-License-Identifier: BSD-3-Clause
>> - * Copyright(c) 2015 Cavium, Inc
>> - */
>> -
>> -#ifndef _RTE_CRC_ARM64_H_
>> -#define _RTE_CRC_ARM64_H_
>> -
>> -/**
>> - * @file
>> - *
>> - * RTE CRC arm64 Hash
>> - */
>> -
>> -#ifdef __cplusplus
>> -extern "C" {
>> -#endif
>> -
>> -#include <stdint.h>
>> -#include <rte_cpuflags.h>
>> -#include <rte_branch_prediction.h>
>> -#include <rte_common.h>
>> -
>> -static inline uint32_t
>> -crc32c_arm64_u8(uint8_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32cb %w[crc], %w[crc], %w[value]"
>> -			: [crc] "+r" (init_val)
>> -			: [value] "r" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_arm64_u16(uint16_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32ch %w[crc], %w[crc], %w[value]"
>> -			: [crc] "+r" (init_val)
>> -			: [value] "r" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_arm64_u32(uint32_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32cw %w[crc], %w[crc], %w[value]"
>> -			: [crc] "+r" (init_val)
>> -			: [value] "r" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_arm64_u64(uint64_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32cx %w[crc], %w[crc], %x[value]"
>> -			: [crc] "+r" (init_val)
>> -			: [value] "r" (data));
>> -	return init_val;
>> -}
>> -
>> -/**
>> - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
>> - * calculation.
>> - *
>> - * @param alg
>> - *   An OR of following flags:
>> - *   - (CRC32_SW) Don't use arm64 crc intrinsics
>> - *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
>> - *
>> - */
>> -static inline void
>> -rte_hash_crc_set_alg(uint8_t alg)
>> -{
>> -	switch (alg) {
>> -	case CRC32_ARM64:
>> -		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
>> -			alg = CRC32_SW;
>> -		/* fall-through */
>> -	case CRC32_SW:
>> -		crc32_alg = alg;
>> -		/* fall-through */
>> -	default:
>> -		break;
>> -	}
>> -}
>> -
>> -/* Setting the best available algorithm */
>> -RTE_INIT(rte_hash_crc_init_alg)
>> -{
>> -	rte_hash_crc_set_alg(CRC32_ARM64);
>> -}
>> -
>> -/**
>> - * Use single crc32 instruction to perform a hash on a 1 byte value.
>> - * Fall back to software crc32 implementation in case arm64 crc
>intrinsics is
>> - * not supported
>> - *
>> - * @param data
>> - *   Data to perform hash on.
>> - * @param init_val
>> - *   Value to initialise hash generator.
>> - * @return
>> - *   32bit calculated hash value.
>> - */
>> -static inline uint32_t
>> -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) -{
>> -	if (likely(crc32_alg & CRC32_ARM64))
>> -		return crc32c_arm64_u8(data, init_val);
>> -
>> -	return crc32c_1byte(data, init_val);
>> -}
>> -
>> -/**
>> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
>> - * Fall back to software crc32 implementation in case arm64 crc
>intrinsics is
>> - * not supported
>> - *
>> - * @param data
>> - *   Data to perform hash on.
>> - * @param init_val
>> - *   Value to initialise hash generator.
>> - * @return
>> - *   32bit calculated hash value.
>> - */
>> -static inline uint32_t
>> -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) -{
>> -	if (likely(crc32_alg & CRC32_ARM64))
>> -		return crc32c_arm64_u16(data, init_val);
>> -
>> -	return crc32c_2bytes(data, init_val);
>> -}
>> -
>> -/**
>> - * Use single crc32 instruction to perform a hash on a 4 byte value.
>> - * Fall back to software crc32 implementation in case arm64 crc
>intrinsics is
>> - * not supported
>> - *
>> - * @param data
>> - *   Data to perform hash on.
>> - * @param init_val
>> - *   Value to initialise hash generator.
>> - * @return
>> - *   32bit calculated hash value.
>> - */
>> -static inline uint32_t
>> -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) -{
>> -	if (likely(crc32_alg & CRC32_ARM64))
>> -		return crc32c_arm64_u32(data, init_val);
>> -
>> -	return crc32c_1word(data, init_val);
>> -}
>> -
>> -/**
>> - * Use single crc32 instruction to perform a hash on a 8 byte value.
>> - * Fall back to software crc32 implementation in case arm64 crc
>intrinsics is
>> - * not supported
>> - *
>> - * @param data
>> - *   Data to perform hash on.
>> - * @param init_val
>> - *   Value to initialise hash generator.
>> - * @return
>> - *   32bit calculated hash value.
>> - */
>> -static inline uint32_t
>> -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) -{
>> -	if (likely(crc32_alg == CRC32_ARM64))
>> -		return crc32c_arm64_u64(data, init_val);
>> -
>> -	return crc32c_2words(data, init_val);
>> -}
>> -
>> -#ifdef __cplusplus
>> -}
>> -#endif
>> -
>> -#endif /* _RTE_CRC_ARM64_H_ */
>> diff --git a/lib/librte_hash/rte_hash_crc.h
>b/lib/librte_hash/rte_hash_crc.h
>> index cf28031b3..292697db1 100644
>> --- a/lib/librte_hash/rte_hash_crc.h
>> +++ b/lib/librte_hash/rte_hash_crc.h
>> @@ -21,6 +21,15 @@ extern "C" {
>>  #include <rte_branch_prediction.h>
>>  #include <rte_common.h>
>>
>> +typedef uint32_t
>> +(*crc32_handler_1b)(uint8_t data, uint32_t init_val); typedef
>uint32_t
>> +(*crc32_handler_2b)(uint16_t data, uint32_t init_val); typedef
>uint32_t
>> +(*crc32_handler_4b)(uint32_t data, uint32_t init_val); typedef
>uint32_t
>> +(*crc32_handler_8b)(uint64_t data, uint32_t init_val);
>
>[Wang, Yipeng] I guess functions pointers makes the code cleaner.
>I am just not sure on performance implications for indirect call.
>@Harry for his comment.
>

Sadly there is no perf testcase for crc hash computation. Maybe we could
have a simple testcase in hash_perf_autotest in next version.

>> +
>>  /* Lookup tables for software implementation of CRC32C */  static
>const
>> uint32_t crc32c_tables[8][256] = {{
>>   0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F,
>0x35F1141C,
>> 0x26A1E7E8, 0xD4CA64EB, @@ -322,7 +331,7 @@
>crc32c_2bytes(uint16_t
>> data, uint32_t init_val)  }
>>
>>  static inline uint32_t
>> -crc32c_1word(uint32_t data, uint32_t init_val)
>> +crc32c_4bytes(uint32_t data, uint32_t init_val)
>>  {
>>  	uint32_t crc, term1, term2;
>>  	crc = init_val;
>> @@ -336,7 +345,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)
>}
>>
>>  static inline uint32_t
>> -crc32c_2words(uint64_t data, uint32_t init_val)
>> +crc32c_8bytes(uint64_t data, uint32_t init_val)
>>  {
>>  	uint32_t crc, term1, term2;
>>  	union {
>> @@ -357,109 +366,94 @@ crc32c_2words(uint64_t data, uint32_t
>init_val)
>>
>>  	return crc;
>>  }
>> -
>> -#if defined(RTE_ARCH_X86)
>> -static inline uint32_t
>> -crc32c_sse42_u8(uint8_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32b %[data], %[init_val];"
>> -			: [init_val] "+r" (init_val)
>> -			: [data] "rm" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_sse42_u16(uint16_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32w %[data], %[init_val];"
>> -			: [init_val] "+r" (init_val)
>> -			: [data] "rm" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_sse42_u32(uint32_t data, uint32_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32l %[data], %[init_val];"
>> -			: [init_val] "+r" (init_val)
>> -			: [data] "rm" (data));
>> -	return init_val;
>> -}
>> -
>> -static inline uint32_t
>> -crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) -{
>> -	union {
>> -		uint32_t u32[2];
>> -		uint64_t u64;
>> -	} d;
>> -
>> -	d.u64 = data;
>> -	init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val);
>> -	init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val);
>> -	return (uint32_t)init_val;
>> -}
>> -#endif
>> -
>> -#ifdef RTE_ARCH_X86_64
>> -static inline uint32_t
>> -crc32c_sse42_u64(uint64_t data, uint64_t init_val) -{
>> -	__asm__ volatile(
>> -			"crc32q %[data], %[init_val];"
>> -			: [init_val] "+r" (init_val)
>> -			: [data] "rm" (data));
>> -	return (uint32_t)init_val;
>> -}
>> -#endif
>> -
>[Wang, Yipeng] keep the blank line before define.

Will add in next version.

>>  #define CRC32_SW            (1U << 0)
>>  #define CRC32_SSE42         (1U << 1)
>>  #define CRC32_x64           (1U << 2)
>>  #define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
>>  #define CRC32_ARM64         (1U << 3)
>>
>> -static uint8_t crc32_alg = CRC32_SW;
>> +static crc32_handler_1b crc32_1b = crc32c_1byte; static
>> +crc32_handler_2b crc32_2b = crc32c_2bytes; static crc32_handler_4b
>> +crc32_4b = crc32c_4bytes; static crc32_handler_8b crc32_8b =
>> +crc32c_8bytes;
>>
>>  #if defined(RTE_ARCH_ARM64) &&
>> defined(RTE_MACHINE_CPUFLAG_CRC32)
>> -#include "rte_crc_arm64.h"
>> -#else
>> +#include "crc_arm64.h"
>> +#endif
>> +
>> +#if defined(RTE_ARCH_X86)
>> +#include "crc_x86.h"
>> +#endif
>>
>>  /**
>> - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
>> + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
>>   * calculation.
>>   *
>>   * @param alg
>>   *   An OR of following flags:
>> - *   - (CRC32_SW) Don't use SSE4.2 intrinsics
>> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-
>[x86/ARMv8])
>>   *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
>> - *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available
>(default)
>> - *
>> + *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available
>(default x86)
>> + *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
>>   */
>>  static inline void
>>  rte_hash_crc_set_alg(uint8_t alg)
>>  {
>> -#if defined(RTE_ARCH_X86)
>> -	if (alg == CRC32_SSE42_x64 &&
>> -
>	!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
>> -		alg = CRC32_SSE42;
>> +	switch (alg) {
>> +	case CRC32_SSE42_x64:
>> +#if defined RTE_ARCH_X86
>> +		crc32_1b = crc32c_sse42_u8;
>> +		crc32_2b = crc32c_sse42_u16;
>> +		crc32_4b = crc32c_sse42_u32;
>> +
>> +	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
>> +		crc32_8b = crc32c_sse42_u64_mimic;
>> +	else
>> +		crc32_8b = crc32c_sse42_u64;
>>  #endif
>> -	crc32_alg = alg;
>> +		break;
>> +	case CRC32_SSE42:
>> +#if defined RTE_ARCH_X86
>> +		crc32_1b = crc32c_sse42_u8;
>> +		crc32_2b = crc32c_sse42_u16;
>> +		crc32_4b = crc32c_sse42_u32;
>> +		crc32_8b = crc32c_sse42_u64_mimic;
>> +#endif
>> +		break;
>> +	case CRC32_ARM64:
>> +#if defined RTE_ARCH_ARM64
>> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) {
>> +		crc32_1b = crc32c_arm64_u8;
>> +		crc32_2b = crc32c_arm64_u16;
>> +		crc32_4b = crc32c_arm64_u32;
>> +		crc32_8b = crc32c_arm64_u64;
>> +	}
>> +#endif
>> +		break;
>> +	case CRC32_SW:
>> +	default:
>> +		crc32_1b = crc32c_1byte;
>> +		crc32_2b = crc32c_2bytes;
>> +		crc32_4b = crc32c_4bytes;
>> +		crc32_8b = crc32c_8bytes;
>> +	break;
>> +	}
>>  }
>>
>>  /* Setting the best available algorithm */
>>  RTE_INIT(rte_hash_crc_init_alg)
>>  {
>> +#if defined RTE_ARCH_X86
>>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> +#elif defined RTE_ARCH_ARM64
>> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> +#else
>> +	rte_hash_crc_set_alg(CRC32_SW);
>> +#endif
>>  }
>>
>>  /**
>> - * Use single crc32 instruction to perform a hash on a byte value.
>> - * Fall back to software crc32 implementation in case SSE4.2 is
>> - * not supported
>> + * Calculate crc32 hash value of 1bytes.
>>   *
>>   * @param data
>>   *   Data to perform hash on.
>> @@ -471,18 +465,11 @@ RTE_INIT(rte_hash_crc_init_alg)  static inline
>> uint32_t  rte_hash_crc_1byte(uint8_t data, uint32_t init_val)  { -#if
>defined
>> RTE_ARCH_X86
>> -	if (likely(crc32_alg & CRC32_SSE42))
>> -		return crc32c_sse42_u8(data, init_val);
>> -#endif
>> -
>> -	return crc32c_1byte(data, init_val);
>> +	return (crc32_1b)(data, init_val);
>>  }
>>
>>  /**
>> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
>> - * Fall back to software crc32 implementation in case SSE4.2 is
>> - * not supported
>> + * Calculate crc32 hash value of 2bytes.
>>   *
>>   * @param data
>>   *   Data to perform hash on.
>> @@ -494,18 +481,11 @@ rte_hash_crc_1byte(uint8_t data, uint32_t
>init_val)
>> static inline uint32_t  rte_hash_crc_2byte(uint16_t data, uint32_t
>init_val)  { -
>> #if defined RTE_ARCH_X86
>> -	if (likely(crc32_alg & CRC32_SSE42))
>> -		return crc32c_sse42_u16(data, init_val);
>> -#endif
>> -
>> -	return crc32c_2bytes(data, init_val);
>> +	return (crc32_2b)(data, init_val);
>>  }
>>
>>  /**
>> - * Use single crc32 instruction to perform a hash on a 4 byte value.
>> - * Fall back to software crc32 implementation in case SSE4.2 is
>> - * not supported
>> + * Calculate crc32 hash value of 4bytes.
>>   *
>>   * @param data
>>   *   Data to perform hash on.
>> @@ -517,18 +497,11 @@ rte_hash_crc_2byte(uint16_t data, uint32_t
>init_val)
>> static inline uint32_t  rte_hash_crc_4byte(uint32_t data, uint32_t
>init_val)  { -
>> #if defined RTE_ARCH_X86
>> -	if (likely(crc32_alg & CRC32_SSE42))
>> -		return crc32c_sse42_u32(data, init_val);
>> -#endif
>> -
>> -	return crc32c_1word(data, init_val);
>> +	return (crc32_4b)(data, init_val);
>>  }
>>
>>  /**
>> - * Use single crc32 instruction to perform a hash on a 8 byte value.
>> - * Fall back to software crc32 implementation in case SSE4.2 is
>> - * not supported
>> + * Calculate crc32 hash value of 8bytes.
>>   *
>>   * @param data
>>   *   Data to perform hash on.
>> @@ -540,21 +513,9 @@ rte_hash_crc_4byte(uint32_t data, uint32_t
>init_val)
>> static inline uint32_t  rte_hash_crc_8byte(uint64_t data, uint32_t
>init_val)  { -
>> #ifdef RTE_ARCH_X86_64
>> -	if (likely(crc32_alg == CRC32_SSE42_x64))
>> -		return crc32c_sse42_u64(data, init_val);
>> -#endif
>> -
>> -#if defined RTE_ARCH_X86
>> -	if (likely(crc32_alg & CRC32_SSE42))
>> -		return crc32c_sse42_u64_mimic(data, init_val);
>> -#endif
>> -
>> -	return crc32c_2words(data, init_val);
>> +	return (crc32_8b)(data, init_val);
>>  }
>>
>> -#endif
>> -
>>  /**
>>   * Calculate CRC32 hash on user-supplied byte array.
>>   *
>> --
>> 2.17.1
>[Wang, Yipeng]
>Thanks for the patch, please see my comment inlined.
>

Thanks for the review.

>I think it is good to hide the architecture specific headers.
>And I agree with Harry that we shouldn't silently remove a public
>header.
>I don't know the best practice on handling this though (e.g. symlink or
>dummyfile), either way looks fine to me.
>Bruce/Thomas may comment.
>
>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-08 12:55 ` Ananyev, Konstantin
@ 2020-05-10 22:53   ` Pavan Nikhilesh Bhagavatula
  2020-05-11  9:46     ` Ananyev, Konstantin
  0 siblings, 1 reply; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-05-10 22:53 UTC (permalink / raw)
  To: Ananyev, Konstantin, Jerin Jacob Kollanukkaran, thomas, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>>
>> Merge crc32 hash calculation public API headers for x86 and ARM,
>> split implementations of x86 and ARM into their respective private
>> headers.
>> This reduces the ifdef code clutter while keeping current ABI intact.
>>
>> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
>> drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
>> back to SW crc32 calculation for ARM platform.
>>
>> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> ---
>>
>>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
>algorithm
>>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to
>algorithm
>>  set previously via `rte_hash_crc_set_alg()` instead of setting the best
>>  available.
>>  This behaviour should probably change to setting the best available
>algorithm
>>  and is up for discussion.
>>
>>  app/test/test_hash.c            |   6 +
>>  lib/librte_hash/Makefile        |   5 -
>>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>>  lib/librte_hash/meson.build     |   3 +-
>>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
>>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++------------------
>-
>>  7 files changed, 219 insertions(+), 306 deletions(-)
>>  create mode 100644 lib/librte_hash/crc_arm64.h
>>  create mode 100644 lib/librte_hash/crc_x86.h
>>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
>>
>> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
>> index afa3a1a3c..7bd457dac 100644
>> --- a/app/test/test_hash.c
>> +++ b/app/test/test_hash.c
>> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>>  	}
>>
>>  	/* Resetting to best available algorithm */
>> +#if defined RTE_ARCH_X86
>>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> +#elif defined RTE_ARCH_ARM64
>> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> +#else
>> +	rte_hash_crc_set_alg(CRC32_SW);
>> +#endif
>>
>>  	if (i == CRC32_ITERATIONS)
>>  		return 0;
>> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
>> index ec9f86499..f640afc42 100644
>> --- a/lib/librte_hash/Makefile
>> +++ b/lib/librte_hash/Makefile
>> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
>rte_fbk_hash.c
>>  # install this header file
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
>> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
>> -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
>> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
>> -endif
>> -endif
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
>> diff --git a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h
>> new file mode 100644
>> index 000000000..8e75f8297
>
>Wouldn't that break 'make  install T=...'?

My bad I verified with meson and it was building fine.

>As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.).
>Same question about external apps, where they would get from these
>headers?

I think in the next version we can directly have the arch specific functions
Implemented in rte_hash_crc.h. Since its pretty stable code and overhead of extra 
~120 lines.

>
>> --- /dev/null
>> +++ b/lib/librte_hash/crc_arm64.h
>> @@ -0,0 +1,67 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2015 Cavium, Inc
>> + */
>> +


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-10 22:53   ` Pavan Nikhilesh Bhagavatula
@ 2020-05-11  9:46     ` Ananyev, Konstantin
  2020-05-11 10:23       ` Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 16+ messages in thread
From: Ananyev, Konstantin @ 2020-05-11  9:46 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	Wang, Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >>
> >> Merge crc32 hash calculation public API headers for x86 and ARM,
> >> split implementations of x86 and ARM into their respective private
> >> headers.
> >> This reduces the ifdef code clutter while keeping current ABI intact.
> >>
> >> Although we install `rte_crc_arm64.h` it is not used in any of the lib or
> >> drivers layers. All the libs and drivers use `rte_hash_crc.h` which falls
> >> back to SW crc32 calculation for ARM platform.
> >>
> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> ---
> >>
> >>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
> >algorithm
> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback to
> >algorithm
> >>  set previously via `rte_hash_crc_set_alg()` instead of setting the best
> >>  available.
> >>  This behaviour should probably change to setting the best available
> >algorithm
> >>  and is up for discussion.
> >>
> >>  app/test/test_hash.c            |   6 +
> >>  lib/librte_hash/Makefile        |   5 -
> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
> >>  lib/librte_hash/meson.build     |   3 +-
> >>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++------------------
> >-
> >>  7 files changed, 219 insertions(+), 306 deletions(-)
> >>  create mode 100644 lib/librte_hash/crc_arm64.h
> >>  create mode 100644 lib/librte_hash/crc_x86.h
> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
> >>
> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> >> index afa3a1a3c..7bd457dac 100644
> >> --- a/app/test/test_hash.c
> >> +++ b/app/test/test_hash.c
> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
> >>  	}
> >>
> >>  	/* Resetting to best available algorithm */
> >> +#if defined RTE_ARCH_X86
> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> >> +#elif defined RTE_ARCH_ARM64
> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
> >> +#else
> >> +	rte_hash_crc_set_alg(CRC32_SW);
> >> +#endif
> >>
> >>  	if (i == CRC32_ITERATIONS)
> >>  		return 0;
> >> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> >> index ec9f86499..f640afc42 100644
> >> --- a/lib/librte_hash/Makefile
> >> +++ b/lib/librte_hash/Makefile
> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
> >rte_fbk_hash.c
> >>  # install this header file
> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> >> -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
> >> -endif
> >> -endif
> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
> >> diff --git a/lib/librte_hash/crc_arm64.h b/lib/librte_hash/crc_arm64.h
> >> new file mode 100644
> >> index 000000000..8e75f8297
> >
> >Wouldn't that break 'make  install T=...'?
> 
> My bad I verified with meson and it was building fine.
> 
> >As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.).
> >Same question about external apps, where they would get from these
> >headers?
> 
> I think in the next version we can directly have the arch specific functions
> Implemented in rte_hash_crc.h. Since its pretty stable code and overhead of extra
> ~120 lines.

Ok... but why not then just leave arch specific headers, as they are right now?
What is wrong with current approach?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-11  9:46     ` Ananyev, Konstantin
@ 2020-05-11 10:23       ` Pavan Nikhilesh Bhagavatula
  2020-05-11 10:27         ` Ananyev, Konstantin
  0 siblings, 1 reply; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-05-11 10:23 UTC (permalink / raw)
  To: Ananyev, Konstantin, Jerin Jacob Kollanukkaran, thomas, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> >>
>> >> Merge crc32 hash calculation public API headers for x86 and ARM,
>> >> split implementations of x86 and ARM into their respective private
>> >> headers.
>> >> This reduces the ifdef code clutter while keeping current ABI
>intact.
>> >>
>> >> Although we install `rte_crc_arm64.h` it is not used in any of the lib
>or
>> >> drivers layers. All the libs and drivers use `rte_hash_crc.h` which
>falls
>> >> back to SW crc32 calculation for ARM platform.
>> >>
>> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> >> ---
>> >>
>> >>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
>> >algorithm
>> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback
>to
>> >algorithm
>> >>  set previously via `rte_hash_crc_set_alg()` instead of setting the
>best
>> >>  available.
>> >>  This behaviour should probably change to setting the best
>available
>> >algorithm
>> >>  and is up for discussion.
>> >>
>> >>  app/test/test_hash.c            |   6 +
>> >>  lib/librte_hash/Makefile        |   5 -
>> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>> >>  lib/librte_hash/meson.build     |   3 +-
>> >>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
>> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++--------------
>----
>> >-
>> >>  7 files changed, 219 insertions(+), 306 deletions(-)
>> >>  create mode 100644 lib/librte_hash/crc_arm64.h
>> >>  create mode 100644 lib/librte_hash/crc_x86.h
>> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
>> >>
>> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
>> >> index afa3a1a3c..7bd457dac 100644
>> >> --- a/app/test/test_hash.c
>> >> +++ b/app/test/test_hash.c
>> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>> >>  	}
>> >>
>> >>  	/* Resetting to best available algorithm */
>> >> +#if defined RTE_ARCH_X86
>> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> >> +#elif defined RTE_ARCH_ARM64
>> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> >> +#else
>> >> +	rte_hash_crc_set_alg(CRC32_SW);
>> >> +#endif
>> >>
>> >>  	if (i == CRC32_ITERATIONS)
>> >>  		return 0;
>> >> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
>> >> index ec9f86499..f640afc42 100644
>> >> --- a/lib/librte_hash/Makefile
>> >> +++ b/lib/librte_hash/Makefile
>> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
>> >rte_fbk_hash.c
>> >>  # install this header file
>> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
>> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_hash_crc.h
>> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
>> >> -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
>> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_crc_arm64.h
>> >> -endif
>> >> -endif
>> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
>> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
>> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_fbk_hash.h
>> >> diff --git a/lib/librte_hash/crc_arm64.h
>b/lib/librte_hash/crc_arm64.h
>> >> new file mode 100644
>> >> index 000000000..8e75f8297
>> >
>> >Wouldn't that break 'make  install T=...'?
>>
>> My bad I verified with meson and it was building fine.
>>
>> >As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.).
>> >Same question about external apps, where they would get from
>these
>> >headers?
>>
>> I think in the next version we can directly have the arch specific
>functions
>> Implemented in rte_hash_crc.h. Since its pretty stable code and
>overhead of extra
>> ~120 lines.
>
>Ok... but why not then just leave arch specific headers, as they are right
>now?
>What is wrong with current approach?

The problem is if any application directly includes only rte_crc_arm64.h 
(completely legal) it will break the build.

Example:

diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 6a799556d..318670940 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -19,7 +19,7 @@
 #include <rte_memcpy.h>
 #include <rte_ring.h>
 #include <rte_jhash.h>
-#include <rte_hash_crc.h>
+#include <rte_crc_arm64.h>
 #include <rte_tailq.h>

 #include "rte_efd.h"
(END)

Causes:

../lib/librte_hash/rte_crc_arm64.h: In function 'rte_hash_crc_set_alg':
../lib/librte_hash/rte_crc_arm64.h:77:7: error: 'CRC32_ARM64' undeclared (first use in this function)
   77 |  case CRC32_ARM64:
      |       ^~~~~~~~~~~
../lib/librte_hash/rte_crc_arm64.h:77:7: note: each undeclared identifier is reported only once for each function it appears in
../lib/librte_hash/rte_crc_arm64.h:79:10: error: 'CRC32_SW' undeclared (first use in this function)
   79 |    alg = CRC32_SW;
      |          ^~~~~~~~
../lib/librte_hash/rte_crc_arm64.h:82:3: error: 'crc32_alg' undeclared (first use in this function)
   82 |   crc32_alg = alg;
      |   ^~~~~~~~~
../lib/librte_hash/rte_crc_arm64.h: In function 'rte_hash_crc_init_alg':
../lib/librte_hash/rte_crc_arm64.h:92:23: error: 'CRC32_ARM64' undeclared (first use in this function)
   92 |  rte_hash_crc_set_alg(CRC32_ARM64);

Thanks,
Pavan.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-11 10:23       ` Pavan Nikhilesh Bhagavatula
@ 2020-05-11 10:27         ` Ananyev, Konstantin
  2020-05-11 10:57           ` Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 16+ messages in thread
From: Ananyev, Konstantin @ 2020-05-11 10:27 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	Wang, Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev


> 
> >> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> >>
> >> >> Merge crc32 hash calculation public API headers for x86 and ARM,
> >> >> split implementations of x86 and ARM into their respective private
> >> >> headers.
> >> >> This reduces the ifdef code clutter while keeping current ABI
> >intact.
> >> >>
> >> >> Although we install `rte_crc_arm64.h` it is not used in any of the lib
> >or
> >> >> drivers layers. All the libs and drivers use `rte_hash_crc.h` which
> >falls
> >> >> back to SW crc32 calculation for ARM platform.
> >> >>
> >> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> >> ---
> >> >>
> >> >>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
> >> >algorithm
> >> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we fallback
> >to
> >> >algorithm
> >> >>  set previously via `rte_hash_crc_set_alg()` instead of setting the
> >best
> >> >>  available.
> >> >>  This behaviour should probably change to setting the best
> >available
> >> >algorithm
> >> >>  and is up for discussion.
> >> >>
> >> >>  app/test/test_hash.c            |   6 +
> >> >>  lib/librte_hash/Makefile        |   5 -
> >> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
> >> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
> >> >>  lib/librte_hash/meson.build     |   3 +-
> >> >>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
> >> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++--------------
> >----
> >> >-
> >> >>  7 files changed, 219 insertions(+), 306 deletions(-)
> >> >>  create mode 100644 lib/librte_hash/crc_arm64.h
> >> >>  create mode 100644 lib/librte_hash/crc_x86.h
> >> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
> >> >>
> >> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> >> >> index afa3a1a3c..7bd457dac 100644
> >> >> --- a/app/test/test_hash.c
> >> >> +++ b/app/test/test_hash.c
> >> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
> >> >>  	}
> >> >>
> >> >>  	/* Resetting to best available algorithm */
> >> >> +#if defined RTE_ARCH_X86
> >> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> >> >> +#elif defined RTE_ARCH_ARM64
> >> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
> >> >> +#else
> >> >> +	rte_hash_crc_set_alg(CRC32_SW);
> >> >> +#endif
> >> >>
> >> >>  	if (i == CRC32_ITERATIONS)
> >> >>  		return 0;
> >> >> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> >> >> index ec9f86499..f640afc42 100644
> >> >> --- a/lib/librte_hash/Makefile
> >> >> +++ b/lib/librte_hash/Makefile
> >> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
> >> >rte_fbk_hash.c
> >> >>  # install this header file
> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >rte_hash_crc.h
> >> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> >> >> -ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
> >> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >rte_crc_arm64.h
> >> >> -endif
> >> >> -endif
> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >rte_fbk_hash.h
> >> >> diff --git a/lib/librte_hash/crc_arm64.h
> >b/lib/librte_hash/crc_arm64.h
> >> >> new file mode 100644
> >> >> index 000000000..8e75f8297
> >> >
> >> >Wouldn't that break 'make  install T=...'?
> >>
> >> My bad I verified with meson and it was building fine.
> >>
> >> >As now rte_hash_crc.h includes not public headers (crc_x86.h, etc.).
> >> >Same question about external apps, where they would get from
> >these
> >> >headers?
> >>
> >> I think in the next version we can directly have the arch specific
> >functions
> >> Implemented in rte_hash_crc.h. Since its pretty stable code and
> >overhead of extra
> >> ~120 lines.
> >
> >Ok... but why not then just leave arch specific headers, as they are right
> >now?
> >What is wrong with current approach?
> 
> The problem is if any application directly includes only rte_crc_arm64.h
> (completely legal) it will break the build.

But we can probably mark rte_crc_arm64.h as internal, and warn users not to
include it directly (same for rte_crc_x86.h and any other arch specific headers). 

> 
> Example:
> 
> diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
> index 6a799556d..318670940 100644
> --- a/lib/librte_efd/rte_efd.c
> +++ b/lib/librte_efd/rte_efd.c
> @@ -19,7 +19,7 @@
>  #include <rte_memcpy.h>
>  #include <rte_ring.h>
>  #include <rte_jhash.h>
> -#include <rte_hash_crc.h>
> +#include <rte_crc_arm64.h>
>  #include <rte_tailq.h>
> 
>  #include "rte_efd.h"
> (END)
> 
> Causes:
> 
> ../lib/librte_hash/rte_crc_arm64.h: In function 'rte_hash_crc_set_alg':
> ../lib/librte_hash/rte_crc_arm64.h:77:7: error: 'CRC32_ARM64' undeclared (first use in this function)
>    77 |  case CRC32_ARM64:
>       |       ^~~~~~~~~~~
> ../lib/librte_hash/rte_crc_arm64.h:77:7: note: each undeclared identifier is reported only once for each function it appears in
> ../lib/librte_hash/rte_crc_arm64.h:79:10: error: 'CRC32_SW' undeclared (first use in this function)
>    79 |    alg = CRC32_SW;
>       |          ^~~~~~~~
> ../lib/librte_hash/rte_crc_arm64.h:82:3: error: 'crc32_alg' undeclared (first use in this function)
>    82 |   crc32_alg = alg;
>       |   ^~~~~~~~~
> ../lib/librte_hash/rte_crc_arm64.h: In function 'rte_hash_crc_init_alg':
> ../lib/librte_hash/rte_crc_arm64.h:92:23: error: 'CRC32_ARM64' undeclared (first use in this function)
>    92 |  rte_hash_crc_set_alg(CRC32_ARM64);
> 
> Thanks,
> Pavan.
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-11 10:27         ` Ananyev, Konstantin
@ 2020-05-11 10:57           ` Pavan Nikhilesh Bhagavatula
  2020-05-11 12:10             ` Ananyev, Konstantin
  0 siblings, 1 reply; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-05-11 10:57 UTC (permalink / raw)
  To: Ananyev, Konstantin, Jerin Jacob Kollanukkaran, thomas, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> >> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> >> >>
>> >> >> Merge crc32 hash calculation public API headers for x86 and
>ARM,
>> >> >> split implementations of x86 and ARM into their respective
>private
>> >> >> headers.
>> >> >> This reduces the ifdef code clutter while keeping current ABI
>> >intact.
>> >> >>
>> >> >> Although we install `rte_crc_arm64.h` it is not used in any of the
>lib
>> >or
>> >> >> drivers layers. All the libs and drivers use `rte_hash_crc.h` which
>> >falls
>> >> >> back to SW crc32 calculation for ARM platform.
>> >> >>
>> >> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> >> >> ---
>> >> >>
>> >> >>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
>> >> >algorithm
>> >> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we
>fallback
>> >to
>> >> >algorithm
>> >> >>  set previously via `rte_hash_crc_set_alg()` instead of setting
>the
>> >best
>> >> >>  available.
>> >> >>  This behaviour should probably change to setting the best
>> >available
>> >> >algorithm
>> >> >>  and is up for discussion.
>> >> >>
>> >> >>  app/test/test_hash.c            |   6 +
>> >> >>  lib/librte_hash/Makefile        |   5 -
>> >> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>> >> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>> >> >>  lib/librte_hash/meson.build     |   3 +-
>> >> >>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
>> >> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-----------
>---
>> >----
>> >> >-
>> >> >>  7 files changed, 219 insertions(+), 306 deletions(-)
>> >> >>  create mode 100644 lib/librte_hash/crc_arm64.h
>> >> >>  create mode 100644 lib/librte_hash/crc_x86.h
>> >> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
>> >> >>
>> >> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
>> >> >> index afa3a1a3c..7bd457dac 100644
>> >> >> --- a/app/test/test_hash.c
>> >> >> +++ b/app/test/test_hash.c
>> >> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>> >> >>  	}
>> >> >>
>> >> >>  	/* Resetting to best available algorithm */
>> >> >> +#if defined RTE_ARCH_X86
>> >> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> >> >> +#elif defined RTE_ARCH_ARM64
>> >> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> >> >> +#else
>> >> >> +	rte_hash_crc_set_alg(CRC32_SW);
>> >> >> +#endif
>> >> >>
>> >> >>  	if (i == CRC32_ITERATIONS)
>> >> >>  		return 0;
>> >> >> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
>> >> >> index ec9f86499..f640afc42 100644
>> >> >> --- a/lib/librte_hash/Makefile
>> >> >> +++ b/lib/librte_hash/Makefile
>> >> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
>> >> >rte_fbk_hash.c
>> >> >>  # install this header file
>> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
>> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >rte_hash_crc.h
>> >> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
>> >> >> -ifneq ($(findstring
>RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
>> >> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >rte_crc_arm64.h
>> >> >> -endif
>> >> >> -endif
>> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_jhash.h
>> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>rte_thash.h
>> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >rte_fbk_hash.h
>> >> >> diff --git a/lib/librte_hash/crc_arm64.h
>> >b/lib/librte_hash/crc_arm64.h
>> >> >> new file mode 100644
>> >> >> index 000000000..8e75f8297
>> >> >
>> >> >Wouldn't that break 'make  install T=...'?
>> >>
>> >> My bad I verified with meson and it was building fine.
>> >>
>> >> >As now rte_hash_crc.h includes not public headers (crc_x86.h,
>etc.).
>> >> >Same question about external apps, where they would get from
>> >these
>> >> >headers?
>> >>
>> >> I think in the next version we can directly have the arch specific
>> >functions
>> >> Implemented in rte_hash_crc.h. Since its pretty stable code and
>> >overhead of extra
>> >> ~120 lines.
>> >
>> >Ok... but why not then just leave arch specific headers, as they are
>right
>> >now?
>> >What is wrong with current approach?
>>
>> The problem is if any application directly includes only
>rte_crc_arm64.h
>> (completely legal) it will break the build.
>
>But we can probably mark rte_crc_arm64.h as internal, and warn users
>not to
>include it directly (same for rte_crc_x86.h and any other arch specific
>headers).

Yes but I think merging them would be a cleaner, number of constructors would be 
one and maybe we could select the best available algorithm on a given platform when 
application requests unsupported one.

As Yipeng mentioned do you thing having a indirect call instead of runtime branch be 
depreciative in terms of performance?

>
>>
>> Example:
>>
>> diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
>> index 6a799556d..318670940 100644
>> --- a/lib/librte_efd/rte_efd.c
>> +++ b/lib/librte_efd/rte_efd.c
>> @@ -19,7 +19,7 @@
>>  #include <rte_memcpy.h>
>>  #include <rte_ring.h>
>>  #include <rte_jhash.h>
>> -#include <rte_hash_crc.h>
>> +#include <rte_crc_arm64.h>
>>  #include <rte_tailq.h>
>>
>>  #include "rte_efd.h"
>> (END)
>>
>> Causes:
>>
>> ../lib/librte_hash/rte_crc_arm64.h: In function
>'rte_hash_crc_set_alg':
>> ../lib/librte_hash/rte_crc_arm64.h:77:7: error: 'CRC32_ARM64'
>undeclared (first use in this function)
>>    77 |  case CRC32_ARM64:
>>       |       ^~~~~~~~~~~
>> ../lib/librte_hash/rte_crc_arm64.h:77:7: note: each undeclared
>identifier is reported only once for each function it appears in
>> ../lib/librte_hash/rte_crc_arm64.h:79:10: error: 'CRC32_SW'
>undeclared (first use in this function)
>>    79 |    alg = CRC32_SW;
>>       |          ^~~~~~~~
>> ../lib/librte_hash/rte_crc_arm64.h:82:3: error: 'crc32_alg' undeclared
>(first use in this function)
>>    82 |   crc32_alg = alg;
>>       |   ^~~~~~~~~
>> ../lib/librte_hash/rte_crc_arm64.h: In function
>'rte_hash_crc_init_alg':
>> ../lib/librte_hash/rte_crc_arm64.h:92:23: error: 'CRC32_ARM64'
>undeclared (first use in this function)
>>    92 |  rte_hash_crc_set_alg(CRC32_ARM64);
>>
>> Thanks,
>> Pavan.
>>
>>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-11 10:57           ` Pavan Nikhilesh Bhagavatula
@ 2020-05-11 12:10             ` Ananyev, Konstantin
  2020-05-11 12:32               ` Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 16+ messages in thread
From: Ananyev, Konstantin @ 2020-05-11 12:10 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob Kollanukkaran, thomas,
	Wang, Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

> 
> >> >> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> >> >>
> >> >> >> Merge crc32 hash calculation public API headers for x86 and
> >ARM,
> >> >> >> split implementations of x86 and ARM into their respective
> >private
> >> >> >> headers.
> >> >> >> This reduces the ifdef code clutter while keeping current ABI
> >> >intact.
> >> >> >>
> >> >> >> Although we install `rte_crc_arm64.h` it is not used in any of the
> >lib
> >> >or
> >> >> >> drivers layers. All the libs and drivers use `rte_hash_crc.h` which
> >> >falls
> >> >> >> back to SW crc32 calculation for ARM platform.
> >> >> >>
> >> >> >> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >> >> >> ---
> >> >> >>
> >> >> >>  Currently, if application incorrectly sets CRC32_ARM64 as crc32
> >> >> >algorithm
> >> >> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we
> >fallback
> >> >to
> >> >> >algorithm
> >> >> >>  set previously via `rte_hash_crc_set_alg()` instead of setting
> >the
> >> >best
> >> >> >>  available.
> >> >> >>  This behaviour should probably change to setting the best
> >> >available
> >> >> >algorithm
> >> >> >>  and is up for discussion.
> >> >> >>
> >> >> >>  app/test/test_hash.c            |   6 +
> >> >> >>  lib/librte_hash/Makefile        |   5 -
> >> >> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
> >> >> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
> >> >> >>  lib/librte_hash/meson.build     |   3 +-
> >> >> >>  lib/librte_hash/rte_crc_arm64.h | 183 ------------------------------
> >> >> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-----------
> >---
> >> >----
> >> >> >-
> >> >> >>  7 files changed, 219 insertions(+), 306 deletions(-)
> >> >> >>  create mode 100644 lib/librte_hash/crc_arm64.h
> >> >> >>  create mode 100644 lib/librte_hash/crc_x86.h
> >> >> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
> >> >> >>
> >> >> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> >> >> >> index afa3a1a3c..7bd457dac 100644
> >> >> >> --- a/app/test/test_hash.c
> >> >> >> +++ b/app/test/test_hash.c
> >> >> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
> >> >> >>  	}
> >> >> >>
> >> >> >>  	/* Resetting to best available algorithm */
> >> >> >> +#if defined RTE_ARCH_X86
> >> >> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> >> >> >> +#elif defined RTE_ARCH_ARM64
> >> >> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
> >> >> >> +#else
> >> >> >> +	rte_hash_crc_set_alg(CRC32_SW);
> >> >> >> +#endif
> >> >> >>
> >> >> >>  	if (i == CRC32_ITERATIONS)
> >> >> >>  		return 0;
> >> >> >> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> >> >> >> index ec9f86499..f640afc42 100644
> >> >> >> --- a/lib/librte_hash/Makefile
> >> >> >> +++ b/lib/librte_hash/Makefile
> >> >> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) +=
> >> >> >rte_fbk_hash.c
> >> >> >>  # install this header file
> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >> >rte_hash_crc.h
> >> >> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
> >> >> >> -ifneq ($(findstring
> >RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
> >> >> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >> >rte_crc_arm64.h
> >> >> >> -endif
> >> >> >> -endif
> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >rte_jhash.h
> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >rte_thash.h
> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
> >> >rte_fbk_hash.h
> >> >> >> diff --git a/lib/librte_hash/crc_arm64.h
> >> >b/lib/librte_hash/crc_arm64.h
> >> >> >> new file mode 100644
> >> >> >> index 000000000..8e75f8297
> >> >> >
> >> >> >Wouldn't that break 'make  install T=...'?
> >> >>
> >> >> My bad I verified with meson and it was building fine.
> >> >>
> >> >> >As now rte_hash_crc.h includes not public headers (crc_x86.h,
> >etc.).
> >> >> >Same question about external apps, where they would get from
> >> >these
> >> >> >headers?
> >> >>
> >> >> I think in the next version we can directly have the arch specific
> >> >functions
> >> >> Implemented in rte_hash_crc.h. Since its pretty stable code and
> >> >overhead of extra
> >> >> ~120 lines.
> >> >
> >> >Ok... but why not then just leave arch specific headers, as they are
> >right
> >> >now?
> >> >What is wrong with current approach?
> >>
> >> The problem is if any application directly includes only
> >rte_crc_arm64.h
> >> (completely legal) it will break the build.
> >
> >But we can probably mark rte_crc_arm64.h as internal, and warn users
> >not to
> >include it directly (same for rte_crc_x86.h and any other arch specific
> >headers).
> 
> Yes but I think merging them would be a cleaner, number of constructors would be
> one and maybe we could select the best available algorithm on a given platform when
> application requests unsupported one.

Ok, but we can still have one constructor, and two (or more) different arch specific headers,
that would be included into main header conditionally by  #ifdef RTE_ARCH_....

> 
> As Yipeng mentioned do you thing having a indirect call instead of runtime branch be
> depreciative in terms of performance?

I think run-time branch by some global var would be much faster than indirect function call
(at least on IA).

> 
> >
> >>
> >> Example:
> >>
> >> diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
> >> index 6a799556d..318670940 100644
> >> --- a/lib/librte_efd/rte_efd.c
> >> +++ b/lib/librte_efd/rte_efd.c
> >> @@ -19,7 +19,7 @@
> >>  #include <rte_memcpy.h>
> >>  #include <rte_ring.h>
> >>  #include <rte_jhash.h>
> >> -#include <rte_hash_crc.h>
> >> +#include <rte_crc_arm64.h>
> >>  #include <rte_tailq.h>
> >>
> >>  #include "rte_efd.h"
> >> (END)
> >>
> >> Causes:
> >>
> >> ../lib/librte_hash/rte_crc_arm64.h: In function
> >'rte_hash_crc_set_alg':
> >> ../lib/librte_hash/rte_crc_arm64.h:77:7: error: 'CRC32_ARM64'
> >undeclared (first use in this function)
> >>    77 |  case CRC32_ARM64:
> >>       |       ^~~~~~~~~~~
> >> ../lib/librte_hash/rte_crc_arm64.h:77:7: note: each undeclared
> >identifier is reported only once for each function it appears in
> >> ../lib/librte_hash/rte_crc_arm64.h:79:10: error: 'CRC32_SW'
> >undeclared (first use in this function)
> >>    79 |    alg = CRC32_SW;
> >>       |          ^~~~~~~~
> >> ../lib/librte_hash/rte_crc_arm64.h:82:3: error: 'crc32_alg' undeclared
> >(first use in this function)
> >>    82 |   crc32_alg = alg;
> >>       |   ^~~~~~~~~
> >> ../lib/librte_hash/rte_crc_arm64.h: In function
> >'rte_hash_crc_init_alg':
> >> ../lib/librte_hash/rte_crc_arm64.h:92:23: error: 'CRC32_ARM64'
> >undeclared (first use in this function)
> >>    92 |  rte_hash_crc_set_alg(CRC32_ARM64);
> >>
> >> Thanks,
> >> Pavan.
> >>
> >>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM
  2020-05-11 12:10             ` Ananyev, Konstantin
@ 2020-05-11 12:32               ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 16+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2020-05-11 12:32 UTC (permalink / raw)
  To: Ananyev, Konstantin, Jerin Jacob Kollanukkaran, thomas, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce, Ruifeng Wang
  Cc: dev

>> >> >> >> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>> >> >> >>
>> >> >> >> Merge crc32 hash calculation public API headers for x86 and
>> >ARM,
>> >> >> >> split implementations of x86 and ARM into their respective
>> >private
>> >> >> >> headers.
>> >> >> >> This reduces the ifdef code clutter while keeping current ABI
>> >> >intact.
>> >> >> >>
>> >> >> >> Although we install `rte_crc_arm64.h` it is not used in any of
>the
>> >lib
>> >> >or
>> >> >> >> drivers layers. All the libs and drivers use `rte_hash_crc.h`
>which
>> >> >falls
>> >> >> >> back to SW crc32 calculation for ARM platform.
>> >> >> >>
>> >> >> >> Signed-off-by: Pavan Nikhilesh
><pbhagavatula@marvell.com>
>> >> >> >> ---
>> >> >> >>
>> >> >> >>  Currently, if application incorrectly sets CRC32_ARM64 as
>crc32
>> >> >> >algorithm
>> >> >> >>  through `rte_hash_crc_set_alg()` on x86 or vice-versa we
>> >fallback
>> >> >to
>> >> >> >algorithm
>> >> >> >>  set previously via `rte_hash_crc_set_alg()` instead of setting
>> >the
>> >> >best
>> >> >> >>  available.
>> >> >> >>  This behaviour should probably change to setting the best
>> >> >available
>> >> >> >algorithm
>> >> >> >>  and is up for discussion.
>> >> >> >>
>> >> >> >>  app/test/test_hash.c            |   6 +
>> >> >> >>  lib/librte_hash/Makefile        |   5 -
>> >> >> >>  lib/librte_hash/crc_arm64.h     |  67 +++++++++++
>> >> >> >>  lib/librte_hash/crc_x86.h       |  68 +++++++++++
>> >> >> >>  lib/librte_hash/meson.build     |   3 +-
>> >> >> >>  lib/librte_hash/rte_crc_arm64.h | 183 --------------------------
>----
>> >> >> >>  lib/librte_hash/rte_hash_crc.h  | 193 +++++++++++++-------
>----
>> >---
>> >> >----
>> >> >> >-
>> >> >> >>  7 files changed, 219 insertions(+), 306 deletions(-)
>> >> >> >>  create mode 100644 lib/librte_hash/crc_arm64.h
>> >> >> >>  create mode 100644 lib/librte_hash/crc_x86.h
>> >> >> >>  delete mode 100644 lib/librte_hash/rte_crc_arm64.h
>> >> >> >>
>> >> >> >> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
>> >> >> >> index afa3a1a3c..7bd457dac 100644
>> >> >> >> --- a/app/test/test_hash.c
>> >> >> >> +++ b/app/test/test_hash.c
>> >> >> >> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>> >> >> >>  	}
>> >> >> >>
>> >> >> >>  	/* Resetting to best available algorithm */
>> >> >> >> +#if defined RTE_ARCH_X86
>> >> >> >>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
>> >> >> >> +#elif defined RTE_ARCH_ARM64
>> >> >> >> +	rte_hash_crc_set_alg(CRC32_ARM64);
>> >> >> >> +#else
>> >> >> >> +	rte_hash_crc_set_alg(CRC32_SW);
>> >> >> >> +#endif
>> >> >> >>
>> >> >> >>  	if (i == CRC32_ITERATIONS)
>> >> >> >>  		return 0;
>> >> >> >> diff --git a/lib/librte_hash/Makefile
>b/lib/librte_hash/Makefile
>> >> >> >> index ec9f86499..f640afc42 100644
>> >> >> >> --- a/lib/librte_hash/Makefile
>> >> >> >> +++ b/lib/librte_hash/Makefile
>> >> >> >> @@ -19,11 +19,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH)
>+=
>> >> >> >rte_fbk_hash.c
>> >> >> >>  # install this header file
>> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include :=
>rte_hash.h
>> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >> >rte_hash_crc.h
>> >> >> >> -ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
>> >> >> >> -ifneq ($(findstring
>> >RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
>> >> >> >> -SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >> >rte_crc_arm64.h
>> >> >> >> -endif
>> >> >> >> -endif
>> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >rte_jhash.h
>> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >rte_thash.h
>> >> >> >>  SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include +=
>> >> >rte_fbk_hash.h
>> >> >> >> diff --git a/lib/librte_hash/crc_arm64.h
>> >> >b/lib/librte_hash/crc_arm64.h
>> >> >> >> new file mode 100644
>> >> >> >> index 000000000..8e75f8297
>> >> >> >
>> >> >> >Wouldn't that break 'make  install T=...'?
>> >> >>
>> >> >> My bad I verified with meson and it was building fine.
>> >> >>
>> >> >> >As now rte_hash_crc.h includes not public headers (crc_x86.h,
>> >etc.).
>> >> >> >Same question about external apps, where they would get
>from
>> >> >these
>> >> >> >headers?
>> >> >>
>> >> >> I think in the next version we can directly have the arch specific
>> >> >functions
>> >> >> Implemented in rte_hash_crc.h. Since its pretty stable code and
>> >> >overhead of extra
>> >> >> ~120 lines.
>> >> >
>> >> >Ok... but why not then just leave arch specific headers, as they
>are
>> >right
>> >> >now?
>> >> >What is wrong with current approach?
>> >>
>> >> The problem is if any application directly includes only
>> >rte_crc_arm64.h
>> >> (completely legal) it will break the build.
>> >
>> >But we can probably mark rte_crc_arm64.h as internal, and warn
>users
>> >not to
>> >include it directly (same for rte_crc_x86.h and any other arch specific
>> >headers).
>>
>> Yes but I think merging them would be a cleaner, number of
>constructors would be
>> one and maybe we could select the best available algorithm on a
>given platform when
>> application requests unsupported one.
>
>Ok, but we can still have one constructor, and two (or more) different
>arch specific headers,
>that would be included into main header conditionally by  #ifdef
>RTE_ARCH_....
>
>>
>> As Yipeng mentioned do you thing having a indirect call instead of
>runtime branch be
>> depreciative in terms of performance?
>
>I think run-time branch by some global var would be much faster than
>indirect function call
>(at least on IA).
>

Ok, makes sense as in a tight loop the run-time branch would be hoisted out.
Let me draft a RFC v2.

>>
>> >
>> >>
>> >> Example:
>> >>
>> >> diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
>> >> index 6a799556d..318670940 100644
>> >> --- a/lib/librte_efd/rte_efd.c
>> >> +++ b/lib/librte_efd/rte_efd.c
>> >> @@ -19,7 +19,7 @@
>> >>  #include <rte_memcpy.h>
>> >>  #include <rte_ring.h>
>> >>  #include <rte_jhash.h>
>> >> -#include <rte_hash_crc.h>
>> >> +#include <rte_crc_arm64.h>
>> >>  #include <rte_tailq.h>
>> >>
>> >>  #include "rte_efd.h"
>> >> (END)
>> >>
>> >> Causes:
>> >>
>> >> ../lib/librte_hash/rte_crc_arm64.h: In function
>> >'rte_hash_crc_set_alg':
>> >> ../lib/librte_hash/rte_crc_arm64.h:77:7: error: 'CRC32_ARM64'
>> >undeclared (first use in this function)
>> >>    77 |  case CRC32_ARM64:
>> >>       |       ^~~~~~~~~~~
>> >> ../lib/librte_hash/rte_crc_arm64.h:77:7: note: each undeclared
>> >identifier is reported only once for each function it appears in
>> >> ../lib/librte_hash/rte_crc_arm64.h:79:10: error: 'CRC32_SW'
>> >undeclared (first use in this function)
>> >>    79 |    alg = CRC32_SW;
>> >>       |          ^~~~~~~~
>> >> ../lib/librte_hash/rte_crc_arm64.h:82:3: error: 'crc32_alg'
>undeclared
>> >(first use in this function)
>> >>    82 |   crc32_alg = alg;
>> >>       |   ^~~~~~~~~
>> >> ../lib/librte_hash/rte_crc_arm64.h: In function
>> >'rte_hash_crc_init_alg':
>> >> ../lib/librte_hash/rte_crc_arm64.h:92:23: error: 'CRC32_ARM64'
>> >undeclared (first use in this function)
>> >>    92 |  rte_hash_crc_set_alg(CRC32_ARM64);
>> >>
>> >> Thanks,
>> >> Pavan.
>> >>
>> >>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev]  [RFC v2] hash: unify crc32 API header for x86 and ARM
  2020-04-29 18:05 [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM pbhagavatula
                   ` (2 preceding siblings ...)
  2020-05-08 12:55 ` Ananyev, Konstantin
@ 2020-05-12 20:40 ` pbhagavatula
  2020-05-13  3:04   ` Ruifeng Wang
  3 siblings, 1 reply; 16+ messages in thread
From: pbhagavatula @ 2020-05-12 20:40 UTC (permalink / raw)
  To: jerinj, konstantin.ananyev, harry.van.haaren, Yipeng Wang,
	Sameh Gobriel, Bruce Richardson, Ruifeng Wang
  Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Merge crc32 hash calculation public API headers for x86 and ARM.
Select the best available CRC32 algorithm when unsupported algorithm
on a given CPU architecture is requested by an application.

Previously, if an application directly includes `rte_crc_arm64.h` without
including `rte_hash_crc.h` it will fail to compile.
Although, `rte_crc_arm64.h` is no longer needed make it a dummy file for
ABI purposes.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v2 Changes:
 - Don't remove `rte_crc_arm64.h` for ABI purposes.
 - Revert function pointer approach for performance reasons.
 - Select the best available algorithm based on the arch when user passes an
 unsupported crc32 algorithm.

 app/test/test_hash.c            |   6 ++
 lib/librte_hash/meson.build     |   6 +-
 lib/librte_hash/rte_crc_arm64.h | 175 +-------------------------------
 lib/librte_hash/rte_hash_crc.h  | 153 ++++++++++++++++++++--------
 4 files changed, 122 insertions(+), 218 deletions(-)

diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index afa3a1a3c..7bd457dac 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
 	}

 	/* Resetting to best available algorithm */
+#if defined RTE_ARCH_X86
 	rte_hash_crc_set_alg(CRC32_SSE42_x64);
+#elif defined RTE_ARCH_ARM64
+	rte_hash_crc_set_alg(CRC32_ARM64);
+#else
+	rte_hash_crc_set_alg(CRC32_SW);
+#endif

 	if (i == CRC32_ITERATIONS)
 		return 0;
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 6ab46ae9d..8a3cf2f64 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -1,12 +1,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation

-headers = files('rte_crc_arm64.h',
-	'rte_fbk_hash.h',
+headers = files('rte_fbk_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
 	'rte_jhash.h',
 	'rte_thash.h')
+if dpdk_conf.has('RTE_ARCH_ARM64')
+	headers += files('rte_crc_arm64.h')
+endif

 sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
 deps += ['ring']
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
index b4628cfc0..adfcafc7d 100644
--- a/lib/librte_hash/rte_crc_arm64.h
+++ b/lib/librte_hash/rte_crc_arm64.h
@@ -5,179 +5,6 @@
 #ifndef _RTE_CRC_ARM64_H_
 #define _RTE_CRC_ARM64_H_

-/**
- * @file
- *
- * RTE CRC arm64 Hash
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include <rte_cpuflags.h>
-#include <rte_branch_prediction.h>
-#include <rte_common.h>
-
-static inline uint32_t
-crc32c_arm64_u8(uint8_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cb %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u16(uint16_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32ch %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u32(uint32_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cw %w[crc], %w[crc], %w[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-static inline uint32_t
-crc32c_arm64_u64(uint64_t data, uint32_t init_val)
-{
-	__asm__ volatile(
-			"crc32cx %w[crc], %w[crc], %x[value]"
-			: [crc] "+r" (init_val)
-			: [value] "r" (data));
-	return init_val;
-}
-
-/**
- * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
- * calculation.
- *
- * @param alg
- *   An OR of following flags:
- *   - (CRC32_SW) Don't use arm64 crc intrinsics
- *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
- *
- */
-static inline void
-rte_hash_crc_set_alg(uint8_t alg)
-{
-	switch (alg) {
-	case CRC32_ARM64:
-		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
-			alg = CRC32_SW;
-		/* fall-through */
-	case CRC32_SW:
-		crc32_alg = alg;
-		/* fall-through */
-	default:
-		break;
-	}
-}
-
-/* Setting the best available algorithm */
-RTE_INIT(rte_hash_crc_init_alg)
-{
-	rte_hash_crc_set_alg(CRC32_ARM64);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 1 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u8(data, init_val);
-
-	return crc32c_1byte(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 2 bytes value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u16(data, init_val);
-
-	return crc32c_2bytes(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 4 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg & CRC32_ARM64))
-		return crc32c_arm64_u32(data, init_val);
-
-	return crc32c_1word(data, init_val);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 8 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
- * not supported
- *
- * @param data
- *   Data to perform hash on.
- * @param init_val
- *   Value to initialise hash generator.
- * @return
- *   32bit calculated hash value.
- */
-static inline uint32_t
-rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
-{
-	if (likely(crc32_alg == CRC32_ARM64))
-		return crc32c_arm64_u64(data, init_val);
-
-	return crc32c_2words(data, init_val);
-}
-
-#ifdef __cplusplus
-}
-#endif
+#include "rte_hash_crc.h"

 #endif /* _RTE_CRC_ARM64_H_ */
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index cf28031b3..eaba70c12 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -16,10 +16,12 @@ extern "C" {
 #endif

 #include <stdint.h>
-#include <rte_config.h>
-#include <rte_cpuflags.h>
+
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
+#include <rte_config.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>

 /* Lookup tables for software implementation of CRC32C */
 static const uint32_t crc32c_tables[8][256] = {{
@@ -322,7 +324,7 @@ crc32c_2bytes(uint16_t data, uint32_t init_val)
 }

 static inline uint32_t
-crc32c_1word(uint32_t data, uint32_t init_val)
+crc32c_4bytes(uint32_t data, uint32_t init_val)
 {
 	uint32_t crc, term1, term2;
 	crc = init_val;
@@ -336,7 +338,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)
 }

 static inline uint32_t
-crc32c_2words(uint64_t data, uint32_t init_val)
+crc32c_8bytes(uint64_t data, uint32_t init_val)
 {
 	uint32_t crc, term1, term2;
 	union {
@@ -358,6 +360,48 @@ crc32c_2words(uint64_t data, uint32_t init_val)
 	return crc;
 }

+#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
+static inline uint32_t
+crc32c_arm64_u8(uint8_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cb %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u16(uint16_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32ch %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u32(uint32_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cw %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u64(uint64_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32cx %w[crc], %w[crc], %x[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+#endif
+
 #if defined(RTE_ARCH_X86)
 static inline uint32_t
 crc32c_sse42_u8(uint8_t data, uint32_t init_val)
@@ -424,42 +468,69 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val)

 static uint8_t crc32_alg = CRC32_SW;

-#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
-#include "rte_crc_arm64.h"
-#else
-
 /**
- * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
  * calculation.
  *
  * @param alg
  *   An OR of following flags:
- *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
  *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
- *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
- *
+ *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
+ *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
  */
 static inline void
 rte_hash_crc_set_alg(uint8_t alg)
 {
-#if defined(RTE_ARCH_X86)
-	if (alg == CRC32_SSE42_x64 &&
-			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
-		alg = CRC32_SSE42;
+	switch (alg) {
+	case CRC32_SSE42_x64:
+	case CRC32_SSE42:
+#if defined RTE_ARCH_X86
+		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+			crc32_alg = CRC32_SSE42;
+		else
+			crc32_alg = alg;
+#endif
+#if defined RTE_ARCH_ARM64
+		RTE_LOG(WARNING, HASH,
+			"Incorrect CRC32 algorithm requested setting best"
+			"available algorithm on the architecture\n");
+		rte_hash_crc_set_alg(CRC32_ARM64);
+#endif
+		break;
+	case CRC32_ARM64:
+#if defined RTE_ARCH_ARM64
+		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
+			crc32_alg = CRC32_ARM64;
 #endif
-	crc32_alg = alg;
+#if defined RTE_ARCH_X86
+		RTE_LOG(WARNING, HASH,
+			"Incorrect CRC32 algorithm requested setting best"
+			"available algorithm on the architecture\n");
+		rte_hash_crc_set_alg(CRC32_SSE42_x64);
+#endif
+		break;
+	case CRC32_SW:
+	default:
+		crc32_alg = CRC32_SW;
+	break;
+	}
 }

 /* Setting the best available algorithm */
 RTE_INIT(rte_hash_crc_init_alg)
 {
+#if defined RTE_ARCH_X86
 	rte_hash_crc_set_alg(CRC32_SSE42_x64);
+#elif defined RTE_ARCH_ARM64
+	rte_hash_crc_set_alg(CRC32_ARM64);
+#else
+	rte_hash_crc_set_alg(CRC32_SW);
+#endif
 }

 /**
- * Use single crc32 instruction to perform a hash on a byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 1bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -474,15 +545,15 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
 #if defined RTE_ARCH_X86
 	if (likely(crc32_alg & CRC32_SSE42))
 		return crc32c_sse42_u8(data, init_val);
+#elif defined RTE_ARCH_ARM64
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u8(data, init_val);
 #endif
-
 	return crc32c_1byte(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 2 bytes value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 2bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -497,15 +568,15 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
 #if defined RTE_ARCH_X86
 	if (likely(crc32_alg & CRC32_SSE42))
 		return crc32c_sse42_u16(data, init_val);
+#elif defined RTE_ARCH_ARM64
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u16(data, init_val);
 #endif
-
 	return crc32c_2bytes(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 4 byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 4bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -520,15 +591,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 #if defined RTE_ARCH_X86
 	if (likely(crc32_alg & CRC32_SSE42))
 		return crc32c_sse42_u32(data, init_val);
+#elif defined RTE_ARCH_ARM64
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u32(data, init_val);
 #endif
-
-	return crc32c_1word(data, init_val);
+	return crc32c_4bytes(data, init_val);
 }

 /**
- * Use single crc32 instruction to perform a hash on a 8 byte value.
- * Fall back to software crc32 implementation in case SSE4.2 is
- * not supported
+ * Calculate crc32 hash value of 8bytes.
  *
  * @param data
  *   Data to perform hash on.
@@ -540,21 +611,19 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-#ifdef RTE_ARCH_X86_64
-	if (likely(crc32_alg == CRC32_SSE42_x64))
+#if defined RTE_ARCH_X86_64
+	if (likely(crc32_alg & CRC32_SSE42_x64))
 		return crc32c_sse42_u64(data, init_val);
-#endif
-
-#if defined RTE_ARCH_X86
+#elif defined RTE_ARCH_X86
 	if (likely(crc32_alg & CRC32_SSE42))
 		return crc32c_sse42_u64_mimic(data, init_val);
+#elif defined RTE_ARCH_ARM64
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u64(data, init_val);
 #endif
-
-	return crc32c_2words(data, init_val);
+	return crc32c_8bytes(data, init_val);
 }

-#endif
-
 /**
  * Calculate CRC32 hash on user-supplied byte array.
  *
--
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
  2020-05-12 20:40 ` [dpdk-dev] [RFC v2] " pbhagavatula
@ 2020-05-13  3:04   ` Ruifeng Wang
  2020-05-13 13:22     ` Ananyev, Konstantin
  0 siblings, 1 reply; 16+ messages in thread
From: Ruifeng Wang @ 2020-05-13  3:04 UTC (permalink / raw)
  To: pbhagavatula, jerinj, konstantin.ananyev, harry.van.haaren,
	Yipeng Wang, Sameh Gobriel, Bruce Richardson
  Cc: dev, nd


> -----Original Message-----
> From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> Sent: Wednesday, May 13, 2020 4:40 AM
> To: jerinj@marvell.com; konstantin.ananyev@intel.com;
> harry.van.haaren@intel.com; Yipeng Wang <yipeng1.wang@intel.com>;
> Sameh Gobriel <sameh.gobriel@intel.com>; Bruce Richardson
> <bruce.richardson@intel.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Merge crc32 hash calculation public API headers for x86 and ARM.
> Select the best available CRC32 algorithm when unsupported algorithm on a
> given CPU architecture is requested by an application.
> 
> Previously, if an application directly includes `rte_crc_arm64.h` without
> including `rte_hash_crc.h` it will fail to compile.
> Although, `rte_crc_arm64.h` is no longer needed make it a dummy file for
> ABI purposes.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v2 Changes:
>  - Don't remove `rte_crc_arm64.h` for ABI purposes.
>  - Revert function pointer approach for performance reasons.
>  - Select the best available algorithm based on the arch when user passes an
> unsupported crc32 algorithm.
> 
Maybe split the patch? Changes to select the best available algorithm can be a separate one.

More ifdefs are added. Is it possible to have arch specific rte_hash_crc_xx implementations
like what was done in rte_crc_arm64.h, and include specific headers according to arch in rte_hash.crc.h?
For ABI purpose, rte_crc_arm64.h can be kept and it only includes the new arm64 specific header.

>  app/test/test_hash.c            |   6 ++
>  lib/librte_hash/meson.build     |   6 +-
>  lib/librte_hash/rte_crc_arm64.h | 175 +-------------------------------
> lib/librte_hash/rte_hash_crc.h  | 153 ++++++++++++++++++++--------
>  4 files changed, 122 insertions(+), 218 deletions(-)
> 
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c index
> afa3a1a3c..7bd457dac 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
>  	}
> 
>  	/* Resetting to best available algorithm */
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
> 
>  	if (i == CRC32_ITERATIONS)
>  		return 0;
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae9d..8a3cf2f64 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -1,12 +1,14 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel
> Corporation
> 
> -headers = files('rte_crc_arm64.h',
> -	'rte_fbk_hash.h',
> +headers = files('rte_fbk_hash.h',
>  	'rte_hash_crc.h',
>  	'rte_hash.h',
>  	'rte_jhash.h',
>  	'rte_thash.h')
> +if dpdk_conf.has('RTE_ARCH_ARM64')
> +	headers += files('rte_crc_arm64.h')
> +endif
> 
>  sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')  deps += ['ring'] diff --
> git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
> index b4628cfc0..adfcafc7d 100644
> --- a/lib/librte_hash/rte_crc_arm64.h
> +++ b/lib/librte_hash/rte_crc_arm64.h
> @@ -5,179 +5,6 @@
>  #ifndef _RTE_CRC_ARM64_H_
>  #define _RTE_CRC_ARM64_H_
> 
> -/**
> - * @file
> - *
> - * RTE CRC arm64 Hash
> - */
> -
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <stdint.h>
> -#include <rte_cpuflags.h>
> -#include <rte_branch_prediction.h>
> -#include <rte_common.h>
> -
> -static inline uint32_t
> -crc32c_arm64_u8(uint8_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cb %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u16(uint16_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32ch %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u32(uint32_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cw %w[crc], %w[crc], %w[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u64(uint64_t data, uint32_t init_val) -{
> -	__asm__ volatile(
> -			"crc32cx %w[crc], %w[crc], %x[value]"
> -			: [crc] "+r" (init_val)
> -			: [value] "r" (data));
> -	return init_val;
> -}
> -
> -/**
> - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
> - * calculation.
> - *
> - * @param alg
> - *   An OR of following flags:
> - *   - (CRC32_SW) Don't use arm64 crc intrinsics
> - *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> - *
> - */
> -static inline void
> -rte_hash_crc_set_alg(uint8_t alg)
> -{
> -	switch (alg) {
> -	case CRC32_ARM64:
> -		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> -			alg = CRC32_SW;
> -		/* fall-through */
> -	case CRC32_SW:
> -		crc32_alg = alg;
> -		/* fall-through */
> -	default:
> -		break;
> -	}
> -}
> -
> -/* Setting the best available algorithm */
> -RTE_INIT(rte_hash_crc_init_alg)
> -{
> -	rte_hash_crc_set_alg(CRC32_ARM64);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 1 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u8(data, init_val);
> -
> -	return crc32c_1byte(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u16(data, init_val);
> -
> -	return crc32c_2bytes(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg & CRC32_ARM64))
> -		return crc32c_arm64_u32(data, init_val);
> -
> -	return crc32c_1word(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - *   Data to perform hash on.
> - * @param init_val
> - *   Value to initialise hash generator.
> - * @return
> - *   32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) -{
> -	if (likely(crc32_alg == CRC32_ARM64))
> -		return crc32c_arm64_u64(data, init_val);
> -
> -	return crc32c_2words(data, init_val);
> -}
> -
> -#ifdef __cplusplus
> -}
> -#endif
> +#include "rte_hash_crc.h"
> 
>  #endif /* _RTE_CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index cf28031b3..eaba70c12 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -16,10 +16,12 @@ extern "C" {
>  #endif
> 
>  #include <stdint.h>
> -#include <rte_config.h>
> -#include <rte_cpuflags.h>
> +
>  #include <rte_branch_prediction.h>
>  #include <rte_common.h>
> +#include <rte_config.h>
> +#include <rte_cpuflags.h>
> +#include <rte_log.h>
> 
>  /* Lookup tables for software implementation of CRC32C */  static const
> uint32_t crc32c_tables[8][256] = {{ @@ -322,7 +324,7 @@
> crc32c_2bytes(uint16_t data, uint32_t init_val)  }
> 
>  static inline uint32_t
> -crc32c_1word(uint32_t data, uint32_t init_val)
> +crc32c_4bytes(uint32_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	crc = init_val;
> @@ -336,7 +338,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)  }
> 
>  static inline uint32_t
> -crc32c_2words(uint64_t data, uint32_t init_val)
> +crc32c_8bytes(uint64_t data, uint32_t init_val)
>  {
>  	uint32_t crc, term1, term2;
>  	union {
> @@ -358,6 +360,48 @@ crc32c_2words(uint64_t data, uint32_t init_val)
>  	return crc;
>  }
> 
> +#if defined(RTE_ARCH_ARM64) &&
> defined(RTE_MACHINE_CPUFLAG_CRC32)
> +static inline uint32_t
> +crc32c_arm64_u8(uint8_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cb %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u16(uint16_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32ch %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u32(uint32_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cw %w[crc], %w[crc], %w[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u64(uint64_t data, uint32_t init_val) {
> +	__asm__ volatile(
> +			"crc32cx %w[crc], %w[crc], %x[value]"
> +			: [crc] "+r" (init_val)
> +			: [value] "r" (data));
> +	return init_val;
> +}
> +#endif
> +
>  #if defined(RTE_ARCH_X86)
>  static inline uint32_t
>  crc32c_sse42_u8(uint8_t data, uint32_t init_val) @@ -424,42 +468,69 @@
> crc32c_sse42_u64(uint64_t data, uint64_t init_val)
> 
>  static uint8_t crc32_alg = CRC32_SW;
> 
> -#if defined(RTE_ARCH_ARM64) &&
> defined(RTE_MACHINE_CPUFLAG_CRC32)
> -#include "rte_crc_arm64.h"
> -#else
> -
>  /**
> - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
>   * calculation.
>   *
>   * @param alg
>   *   An OR of following flags:
> - *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> + *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
>   *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
> - *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
> - *
> + *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
> + *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
>   */
>  static inline void
>  rte_hash_crc_set_alg(uint8_t alg)
>  {
> -#if defined(RTE_ARCH_X86)
> -	if (alg == CRC32_SSE42_x64 &&
> -			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> -		alg = CRC32_SSE42;
> +	switch (alg) {
> +	case CRC32_SSE42_x64:
> +	case CRC32_SSE42:
> +#if defined RTE_ARCH_X86
> +		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> +			crc32_alg = CRC32_SSE42;
> +		else
> +			crc32_alg = alg;
> +#endif
> +#if defined RTE_ARCH_ARM64
> +		RTE_LOG(WARNING, HASH,
> +			"Incorrect CRC32 algorithm requested setting best"
> +			"available algorithm on the architecture\n");
> +		rte_hash_crc_set_alg(CRC32_ARM64);
> +#endif
> +		break;
> +	case CRC32_ARM64:
> +#if defined RTE_ARCH_ARM64
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> +			crc32_alg = CRC32_ARM64;
>  #endif
> -	crc32_alg = alg;
> +#if defined RTE_ARCH_X86
> +		RTE_LOG(WARNING, HASH,
> +			"Incorrect CRC32 algorithm requested setting best"
> +			"available algorithm on the architecture\n");
> +		rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#endif
> +		break;
> +	case CRC32_SW:
> +	default:
> +		crc32_alg = CRC32_SW;
> +	break;
> +	}
>  }
> 
>  /* Setting the best available algorithm */
>  RTE_INIT(rte_hash_crc_init_alg)
>  {
> +#if defined RTE_ARCH_X86
>  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> +	rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> +	rte_hash_crc_set_alg(CRC32_SW);
> +#endif
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 1bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -474,15 +545,15 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
>  	if (likely(crc32_alg & CRC32_SSE42))
>  		return crc32c_sse42_u8(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> +	if (likely(crc32_alg & CRC32_ARM64))
> +		return crc32c_arm64_u8(data, init_val);
>  #endif
> -
>  	return crc32c_1byte(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 2bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -497,15 +568,15 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
>  	if (likely(crc32_alg & CRC32_SSE42))
>  		return crc32c_sse42_u16(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> +	if (likely(crc32_alg & CRC32_ARM64))
> +		return crc32c_arm64_u16(data, init_val);
>  #endif
> -
>  	return crc32c_2bytes(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 4bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -520,15 +591,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
>  	if (likely(crc32_alg & CRC32_SSE42))
>  		return crc32c_sse42_u32(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> +	if (likely(crc32_alg & CRC32_ARM64))
> +		return crc32c_arm64_u32(data, init_val);
>  #endif
> -
> -	return crc32c_1word(data, init_val);
> +	return crc32c_4bytes(data, init_val);
>  }
> 
>  /**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 8bytes.
>   *
>   * @param data
>   *   Data to perform hash on.
> @@ -540,21 +611,19 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> static inline uint32_t  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)  { -
> #ifdef RTE_ARCH_X86_64
> -	if (likely(crc32_alg == CRC32_SSE42_x64))
> +#if defined RTE_ARCH_X86_64
> +	if (likely(crc32_alg & CRC32_SSE42_x64))
>  		return crc32c_sse42_u64(data, init_val); -#endif
> -
> -#if defined RTE_ARCH_X86
> +#elif defined RTE_ARCH_X86
>  	if (likely(crc32_alg & CRC32_SSE42))
>  		return crc32c_sse42_u64_mimic(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> +	if (likely(crc32_alg & CRC32_ARM64))
> +		return crc32c_arm64_u64(data, init_val);
>  #endif
> -
> -	return crc32c_2words(data, init_val);
> +	return crc32c_8bytes(data, init_val);
>  }
> 
> -#endif
> -
>  /**
>   * Calculate CRC32 hash on user-supplied byte array.
>   *
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
  2020-05-13  3:04   ` Ruifeng Wang
@ 2020-05-13 13:22     ` Ananyev, Konstantin
  0 siblings, 0 replies; 16+ messages in thread
From: Ananyev, Konstantin @ 2020-05-13 13:22 UTC (permalink / raw)
  To: Ruifeng Wang, pbhagavatula, jerinj, Van Haaren, Harry, Wang,
	Yipeng1, Gobriel, Sameh, Richardson, Bruce
  Cc: dev, nd

> 
> > -----Original Message-----
> > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> > Sent: Wednesday, May 13, 2020 4:40 AM
> > To: jerinj@marvell.com; konstantin.ananyev@intel.com;
> > harry.van.haaren@intel.com; Yipeng Wang <yipeng1.wang@intel.com>;
> > Sameh Gobriel <sameh.gobriel@intel.com>; Bruce Richardson
> > <bruce.richardson@intel.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>
> > Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@marvell.com>
> > Subject: [dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
> >
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Merge crc32 hash calculation public API headers for x86 and ARM.
> > Select the best available CRC32 algorithm when unsupported algorithm on a
> > given CPU architecture is requested by an application.
> >
> > Previously, if an application directly includes `rte_crc_arm64.h` without
> > including `rte_hash_crc.h` it will fail to compile.
> > Although, `rte_crc_arm64.h` is no longer needed make it a dummy file for
> > ABI purposes.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v2 Changes:
> >  - Don't remove `rte_crc_arm64.h` for ABI purposes.
> >  - Revert function pointer approach for performance reasons.
> >  - Select the best available algorithm based on the arch when user passes an
> > unsupported crc32 algorithm.
> >
> Maybe split the patch? Changes to select the best available algorithm can be a separate one.
> 
> More ifdefs are added. Is it possible to have arch specific rte_hash_crc_xx implementations
> like what was done in rte_crc_arm64.h, and include specific headers according to arch in rte_hash.crc.h?

Same thought.

> For ABI purpose, rte_crc_arm64.h can be kept and it only includes the new arm64 specific header.
> 
> >  app/test/test_hash.c            |   6 ++
> >  lib/librte_hash/meson.build     |   6 +-
> >  lib/librte_hash/rte_crc_arm64.h | 175 +-------------------------------
> > lib/librte_hash/rte_hash_crc.h  | 153 ++++++++++++++++++++--------
> >  4 files changed, 122 insertions(+), 218 deletions(-)
> >
> > diff --git a/app/test/test_hash.c b/app/test/test_hash.c index
> > afa3a1a3c..7bd457dac 100644
> > --- a/app/test/test_hash.c
> > +++ b/app/test/test_hash.c
> > @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
> >  	}
> >
> >  	/* Resetting to best available algorithm */
> > +#if defined RTE_ARCH_X86
> >  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> > +#elif defined RTE_ARCH_ARM64
> > +	rte_hash_crc_set_alg(CRC32_ARM64);
> > +#else
> > +	rte_hash_crc_set_alg(CRC32_SW);
> > +#endif
> >
> >  	if (i == CRC32_ITERATIONS)
> >  		return 0;
> > diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> > 6ab46ae9d..8a3cf2f64 100644
> > --- a/lib/librte_hash/meson.build
> > +++ b/lib/librte_hash/meson.build
> > @@ -1,12 +1,14 @@
> >  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel
> > Corporation
> >
> > -headers = files('rte_crc_arm64.h',
> > -	'rte_fbk_hash.h',
> > +headers = files('rte_fbk_hash.h',
> >  	'rte_hash_crc.h',
> >  	'rte_hash.h',
> >  	'rte_jhash.h',
> >  	'rte_thash.h')
> > +if dpdk_conf.has('RTE_ARCH_ARM64')
> > +	headers += files('rte_crc_arm64.h')
> > +endif
> >
> >  sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')  deps += ['ring'] diff --
> > git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
> > index b4628cfc0..adfcafc7d 100644
> > --- a/lib/librte_hash/rte_crc_arm64.h
> > +++ b/lib/librte_hash/rte_crc_arm64.h
> > @@ -5,179 +5,6 @@
> >  #ifndef _RTE_CRC_ARM64_H_
> >  #define _RTE_CRC_ARM64_H_
> >
> > -/**
> > - * @file
> > - *
> > - * RTE CRC arm64 Hash
> > - */
> > -
> > -#ifdef __cplusplus
> > -extern "C" {
> > -#endif
> > -
> > -#include <stdint.h>
> > -#include <rte_cpuflags.h>
> > -#include <rte_branch_prediction.h>
> > -#include <rte_common.h>
> > -
> > -static inline uint32_t
> > -crc32c_arm64_u8(uint8_t data, uint32_t init_val) -{
> > -	__asm__ volatile(
> > -			"crc32cb %w[crc], %w[crc], %w[value]"
> > -			: [crc] "+r" (init_val)
> > -			: [value] "r" (data));
> > -	return init_val;
> > -}
> > -
> > -static inline uint32_t
> > -crc32c_arm64_u16(uint16_t data, uint32_t init_val) -{
> > -	__asm__ volatile(
> > -			"crc32ch %w[crc], %w[crc], %w[value]"
> > -			: [crc] "+r" (init_val)
> > -			: [value] "r" (data));
> > -	return init_val;
> > -}
> > -
> > -static inline uint32_t
> > -crc32c_arm64_u32(uint32_t data, uint32_t init_val) -{
> > -	__asm__ volatile(
> > -			"crc32cw %w[crc], %w[crc], %w[value]"
> > -			: [crc] "+r" (init_val)
> > -			: [value] "r" (data));
> > -	return init_val;
> > -}
> > -
> > -static inline uint32_t
> > -crc32c_arm64_u64(uint64_t data, uint32_t init_val) -{
> > -	__asm__ volatile(
> > -			"crc32cx %w[crc], %w[crc], %x[value]"
> > -			: [crc] "+r" (init_val)
> > -			: [value] "r" (data));
> > -	return init_val;
> > -}
> > -
> > -/**
> > - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
> > - * calculation.
> > - *
> > - * @param alg
> > - *   An OR of following flags:
> > - *   - (CRC32_SW) Don't use arm64 crc intrinsics
> > - *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> > - *
> > - */
> > -static inline void
> > -rte_hash_crc_set_alg(uint8_t alg)
> > -{
> > -	switch (alg) {
> > -	case CRC32_ARM64:
> > -		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> > -			alg = CRC32_SW;
> > -		/* fall-through */
> > -	case CRC32_SW:
> > -		crc32_alg = alg;
> > -		/* fall-through */
> > -	default:
> > -		break;
> > -	}
> > -}
> > -
> > -/* Setting the best available algorithm */
> > -RTE_INIT(rte_hash_crc_init_alg)
> > -{
> > -	rte_hash_crc_set_alg(CRC32_ARM64);
> > -}
> > -
> > -/**
> > - * Use single crc32 instruction to perform a hash on a 1 byte value.
> > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> > - * not supported
> > - *
> > - * @param data
> > - *   Data to perform hash on.
> > - * @param init_val
> > - *   Value to initialise hash generator.
> > - * @return
> > - *   32bit calculated hash value.
> > - */
> > -static inline uint32_t
> > -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) -{
> > -	if (likely(crc32_alg & CRC32_ARM64))
> > -		return crc32c_arm64_u8(data, init_val);
> > -
> > -	return crc32c_1byte(data, init_val);
> > -}
> > -
> > -/**
> > - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> > - * not supported
> > - *
> > - * @param data
> > - *   Data to perform hash on.
> > - * @param init_val
> > - *   Value to initialise hash generator.
> > - * @return
> > - *   32bit calculated hash value.
> > - */
> > -static inline uint32_t
> > -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) -{
> > -	if (likely(crc32_alg & CRC32_ARM64))
> > -		return crc32c_arm64_u16(data, init_val);
> > -
> > -	return crc32c_2bytes(data, init_val);
> > -}
> > -
> > -/**
> > - * Use single crc32 instruction to perform a hash on a 4 byte value.
> > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> > - * not supported
> > - *
> > - * @param data
> > - *   Data to perform hash on.
> > - * @param init_val
> > - *   Value to initialise hash generator.
> > - * @return
> > - *   32bit calculated hash value.
> > - */
> > -static inline uint32_t
> > -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) -{
> > -	if (likely(crc32_alg & CRC32_ARM64))
> > -		return crc32c_arm64_u32(data, init_val);
> > -
> > -	return crc32c_1word(data, init_val);
> > -}
> > -
> > -/**
> > - * Use single crc32 instruction to perform a hash on a 8 byte value.
> > - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> > - * not supported
> > - *
> > - * @param data
> > - *   Data to perform hash on.
> > - * @param init_val
> > - *   Value to initialise hash generator.
> > - * @return
> > - *   32bit calculated hash value.
> > - */
> > -static inline uint32_t
> > -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) -{
> > -	if (likely(crc32_alg == CRC32_ARM64))
> > -		return crc32c_arm64_u64(data, init_val);
> > -
> > -	return crc32c_2words(data, init_val);
> > -}
> > -
> > -#ifdef __cplusplus
> > -}
> > -#endif
> > +#include "rte_hash_crc.h"
> >
> >  #endif /* _RTE_CRC_ARM64_H_ */
> > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> > index cf28031b3..eaba70c12 100644
> > --- a/lib/librte_hash/rte_hash_crc.h
> > +++ b/lib/librte_hash/rte_hash_crc.h
> > @@ -16,10 +16,12 @@ extern "C" {
> >  #endif
> >
> >  #include <stdint.h>
> > -#include <rte_config.h>
> > -#include <rte_cpuflags.h>
> > +
> >  #include <rte_branch_prediction.h>
> >  #include <rte_common.h>
> > +#include <rte_config.h>
> > +#include <rte_cpuflags.h>
> > +#include <rte_log.h>
> >
> >  /* Lookup tables for software implementation of CRC32C */  static const
> > uint32_t crc32c_tables[8][256] = {{ @@ -322,7 +324,7 @@
> > crc32c_2bytes(uint16_t data, uint32_t init_val)  }
> >
> >  static inline uint32_t
> > -crc32c_1word(uint32_t data, uint32_t init_val)
> > +crc32c_4bytes(uint32_t data, uint32_t init_val)
> >  {
> >  	uint32_t crc, term1, term2;
> >  	crc = init_val;
> > @@ -336,7 +338,7 @@ crc32c_1word(uint32_t data, uint32_t init_val)  }
> >
> >  static inline uint32_t
> > -crc32c_2words(uint64_t data, uint32_t init_val)
> > +crc32c_8bytes(uint64_t data, uint32_t init_val)
> >  {
> >  	uint32_t crc, term1, term2;
> >  	union {
> > @@ -358,6 +360,48 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> >  	return crc;
> >  }
> >
> > +#if defined(RTE_ARCH_ARM64) &&
> > defined(RTE_MACHINE_CPUFLAG_CRC32)
> > +static inline uint32_t
> > +crc32c_arm64_u8(uint8_t data, uint32_t init_val) {
> > +	__asm__ volatile(
> > +			"crc32cb %w[crc], %w[crc], %w[value]"
> > +			: [crc] "+r" (init_val)
> > +			: [value] "r" (data));
> > +	return init_val;
> > +}
> > +
> > +static inline uint32_t
> > +crc32c_arm64_u16(uint16_t data, uint32_t init_val) {
> > +	__asm__ volatile(
> > +			"crc32ch %w[crc], %w[crc], %w[value]"
> > +			: [crc] "+r" (init_val)
> > +			: [value] "r" (data));
> > +	return init_val;
> > +}
> > +
> > +static inline uint32_t
> > +crc32c_arm64_u32(uint32_t data, uint32_t init_val) {
> > +	__asm__ volatile(
> > +			"crc32cw %w[crc], %w[crc], %w[value]"
> > +			: [crc] "+r" (init_val)
> > +			: [value] "r" (data));
> > +	return init_val;
> > +}
> > +
> > +static inline uint32_t
> > +crc32c_arm64_u64(uint64_t data, uint32_t init_val) {
> > +	__asm__ volatile(
> > +			"crc32cx %w[crc], %w[crc], %x[value]"
> > +			: [crc] "+r" (init_val)
> > +			: [value] "r" (data));
> > +	return init_val;
> > +}
> > +#endif
> > +
> >  #if defined(RTE_ARCH_X86)
> >  static inline uint32_t
> >  crc32c_sse42_u8(uint8_t data, uint32_t init_val) @@ -424,42 +468,69 @@
> > crc32c_sse42_u64(uint64_t data, uint64_t init_val)
> >
> >  static uint8_t crc32_alg = CRC32_SW;
> >
> > -#if defined(RTE_ARCH_ARM64) &&
> > defined(RTE_MACHINE_CPUFLAG_CRC32)
> > -#include "rte_crc_arm64.h"
> > -#else
> > -
> >  /**
> > - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> > + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
> >   * calculation.
> >   *
> >   * @param alg
> >   *   An OR of following flags:
> > - *   - (CRC32_SW) Don't use SSE4.2 intrinsics
> > + *   - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
> >   *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
> > - *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
> > - *
> > + *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
> > + *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> >   */
> >  static inline void
> >  rte_hash_crc_set_alg(uint8_t alg)
> >  {
> > -#if defined(RTE_ARCH_X86)
> > -	if (alg == CRC32_SSE42_x64 &&
> > -			!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> > -		alg = CRC32_SSE42;
> > +	switch (alg) {
> > +	case CRC32_SSE42_x64:
> > +	case CRC32_SSE42:
> > +#if defined RTE_ARCH_X86
> > +		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> > +			crc32_alg = CRC32_SSE42;
> > +		else
> > +			crc32_alg = alg;
> > +#endif
> > +#if defined RTE_ARCH_ARM64
> > +		RTE_LOG(WARNING, HASH,
> > +			"Incorrect CRC32 algorithm requested setting best"
> > +			"available algorithm on the architecture\n");
> > +		rte_hash_crc_set_alg(CRC32_ARM64);
> > +#endif
> > +		break;
> > +	case CRC32_ARM64:
> > +#if defined RTE_ARCH_ARM64
> > +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> > +			crc32_alg = CRC32_ARM64;
> >  #endif
> > -	crc32_alg = alg;
> > +#if defined RTE_ARCH_X86
> > +		RTE_LOG(WARNING, HASH,
> > +			"Incorrect CRC32 algorithm requested setting best"
> > +			"available algorithm on the architecture\n");
> > +		rte_hash_crc_set_alg(CRC32_SSE42_x64);
> > +#endif
> > +		break;
> > +	case CRC32_SW:
> > +	default:
> > +		crc32_alg = CRC32_SW;
> > +	break;
> > +	}
> >  }
> >
> >  /* Setting the best available algorithm */
> >  RTE_INIT(rte_hash_crc_init_alg)
> >  {
> > +#if defined RTE_ARCH_X86
> >  	rte_hash_crc_set_alg(CRC32_SSE42_x64);
> > +#elif defined RTE_ARCH_ARM64
> > +	rte_hash_crc_set_alg(CRC32_ARM64);
> > +#else
> > +	rte_hash_crc_set_alg(CRC32_SW);
> > +#endif
> >  }
> >
> >  /**
> > - * Use single crc32 instruction to perform a hash on a byte value.
> > - * Fall back to software crc32 implementation in case SSE4.2 is
> > - * not supported
> > + * Calculate crc32 hash value of 1bytes.
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -474,15 +545,15 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
> > #if defined RTE_ARCH_X86
> >  	if (likely(crc32_alg & CRC32_SSE42))
> >  		return crc32c_sse42_u8(data, init_val);
> > +#elif defined RTE_ARCH_ARM64
> > +	if (likely(crc32_alg & CRC32_ARM64))
> > +		return crc32c_arm64_u8(data, init_val);
> >  #endif
> > -
> >  	return crc32c_1byte(data, init_val);
> >  }
> >
> >  /**
> > - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> > - * Fall back to software crc32 implementation in case SSE4.2 is
> > - * not supported
> > + * Calculate crc32 hash value of 2bytes.
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -497,15 +568,15 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
> > #if defined RTE_ARCH_X86
> >  	if (likely(crc32_alg & CRC32_SSE42))
> >  		return crc32c_sse42_u16(data, init_val);
> > +#elif defined RTE_ARCH_ARM64
> > +	if (likely(crc32_alg & CRC32_ARM64))
> > +		return crc32c_arm64_u16(data, init_val);
> >  #endif
> > -
> >  	return crc32c_2bytes(data, init_val);
> >  }
> >
> >  /**
> > - * Use single crc32 instruction to perform a hash on a 4 byte value.
> > - * Fall back to software crc32 implementation in case SSE4.2 is
> > - * not supported
> > + * Calculate crc32 hash value of 4bytes.
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -520,15 +591,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> > #if defined RTE_ARCH_X86
> >  	if (likely(crc32_alg & CRC32_SSE42))
> >  		return crc32c_sse42_u32(data, init_val);
> > +#elif defined RTE_ARCH_ARM64
> > +	if (likely(crc32_alg & CRC32_ARM64))
> > +		return crc32c_arm64_u32(data, init_val);
> >  #endif
> > -
> > -	return crc32c_1word(data, init_val);
> > +	return crc32c_4bytes(data, init_val);
> >  }
> >
> >  /**
> > - * Use single crc32 instruction to perform a hash on a 8 byte value.
> > - * Fall back to software crc32 implementation in case SSE4.2 is
> > - * not supported
> > + * Calculate crc32 hash value of 8bytes.
> >   *
> >   * @param data
> >   *   Data to perform hash on.
> > @@ -540,21 +611,19 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> > static inline uint32_t  rte_hash_crc_8byte(uint64_t data, uint32_t init_val)  { -
> > #ifdef RTE_ARCH_X86_64
> > -	if (likely(crc32_alg == CRC32_SSE42_x64))
> > +#if defined RTE_ARCH_X86_64
> > +	if (likely(crc32_alg & CRC32_SSE42_x64))
> >  		return crc32c_sse42_u64(data, init_val); -#endif
> > -
> > -#if defined RTE_ARCH_X86
> > +#elif defined RTE_ARCH_X86
> >  	if (likely(crc32_alg & CRC32_SSE42))
> >  		return crc32c_sse42_u64_mimic(data, init_val);
> > +#elif defined RTE_ARCH_ARM64
> > +	if (likely(crc32_alg & CRC32_ARM64))
> > +		return crc32c_arm64_u64(data, init_val);
> >  #endif
> > -
> > -	return crc32c_2words(data, init_val);
> > +	return crc32c_8bytes(data, init_val);
> >  }
> >
> > -#endif
> > -
> >  /**
> >   * Calculate CRC32 hash on user-supplied byte array.
> >   *
> > --
> > 2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-05-13 13:22 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-29 18:05 [dpdk-dev] [RFC] hash: unify crc32 API header for x86 and ARM pbhagavatula
2020-04-30  9:14 ` Van Haaren, Harry
2020-04-30  9:27   ` Pavan Nikhilesh Bhagavatula
2020-05-06 22:02 ` Wang, Yipeng1
2020-05-10 22:49   ` Pavan Nikhilesh Bhagavatula
2020-05-08 12:55 ` Ananyev, Konstantin
2020-05-10 22:53   ` Pavan Nikhilesh Bhagavatula
2020-05-11  9:46     ` Ananyev, Konstantin
2020-05-11 10:23       ` Pavan Nikhilesh Bhagavatula
2020-05-11 10:27         ` Ananyev, Konstantin
2020-05-11 10:57           ` Pavan Nikhilesh Bhagavatula
2020-05-11 12:10             ` Ananyev, Konstantin
2020-05-11 12:32               ` Pavan Nikhilesh Bhagavatula
2020-05-12 20:40 ` [dpdk-dev] [RFC v2] " pbhagavatula
2020-05-13  3:04   ` Ruifeng Wang
2020-05-13 13:22     ` Ananyev, Konstantin

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git