* [PATCH v2 1/9] config/riscv: detect presence of Zbc extension
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 2/9] hash: implement crc using riscv carryless multiply Daniel Gregory
` (9 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Bruce Richardson
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory, Stephen Hemminger
The RISC-V Zbc extension adds carry-less multiply instructions we can
use to implement more efficient CRC hashing algorithms.
The RISC-V C api defines architecture extension test macros
https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/riscv-c-api.md#architecture-extension-test-macros
These let us detect whether the Zbc extension is supported on the
compiler and -march we're building with. The C api also defines Zbc
intrinsics we can use rather than inline assembly on newer versions of
GCC (14.1.0+) and Clang (18.1.0+).
The Linux kernel exposes a RISC-V hardware probing syscall for getting
information about the system at run-time including which extensions are
available. We detect whether this interface is present by looking for
the <asm/hwprobe.h> header, as it's only present in newer kernels
(v6.4+). Furthermore, support for detecting certain extensions,
including Zbc, wasn't present until versions after this, so we need to
check the constants this header exports.
The kernel exposes bitmasks for each extension supported by the probing
interface, rather than the bit index that is set if that extensions is
present, so modify the existing cpu flag HWCAP table entries to line up
with this. The values returned by the interface are 64-bits long, so
grow the hwcap registers array to be able to hold them.
If the Zbc extension and intrinsics are both present and we can detect
the Zbc extension at runtime, we define a flag, RTE_RISCV_FEATURE_ZBC.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
config/riscv/meson.build | 41 ++++++++++
lib/eal/riscv/include/rte_cpuflags.h | 2 +
lib/eal/riscv/rte_cpuflags.c | 112 +++++++++++++++++++--------
3 files changed, 123 insertions(+), 32 deletions(-)
diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index 07d7d9da23..5d8411b254 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -119,6 +119,47 @@ foreach flag: arch_config['machine_args']
endif
endforeach
+# check if we can do buildtime detection of extensions supported by the target
+riscv_extension_macros = false
+if (cc.get_define('__riscv_arch_test', args: machine_args) == '1')
+ message('Detected architecture extension test macros')
+ riscv_extension_macros = true
+else
+ warning('RISC-V architecture extension test macros not available. Build-time detection of extensions not possible')
+endif
+
+# check if we can use hwprobe interface for runtime extension detection
+riscv_hwprobe = false
+if (cc.check_header('asm/hwprobe.h', args: machine_args))
+ message('Detected hwprobe interface, enabling runtime detection of supported extensions')
+ machine_args += ['-DRTE_RISCV_FEATURE_HWPROBE']
+ riscv_hwprobe = true
+else
+ warning('Hwprobe interface not available (present in Linux v6.4+), instruction extensions won\'t be enabled')
+endif
+
+# detect extensions
+# RISC-V Carry-less multiplication extension (Zbc) for hardware implementations
+# of CRC-32C (lib/hash/rte_crc_riscv64.h) and CRC-32/16 (lib/net/net_crc_zbc.c).
+# Requires intrinsics available in GCC 14.1.0+ and Clang 18.1.0+
+if (riscv_extension_macros and riscv_hwprobe and
+ (cc.get_define('__riscv_zbc', args: machine_args) != ''))
+ if ((cc.get_id() == 'gcc' and cc.version().version_compare('>=14.1.0'))
+ or (cc.get_id() == 'clang' and cc.version().version_compare('>=18.1.0')))
+ # determine whether we can detect Zbc extension (this wasn't possible until
+ # Linux kernel v6.8)
+ if (cc.compiles('''#include <asm/hwprobe.h>
+ int a = RISCV_HWPROBE_EXT_ZBC;''', args: machine_args))
+ message('Compiling with the Zbc extension')
+ machine_args += ['-DRTE_RISCV_FEATURE_ZBC']
+ else
+ warning('Detected Zbc extension but cannot use because runtime detection doesn\'t support it (support present in Linux kernel v6.8+)')
+ endif
+ else
+ warning('Detected Zbc extension but cannot use because intrinsics are not available (present in GCC 14.1.0+ and Clang 18.1.0+)')
+ endif
+endif
+
# apply flags
foreach flag: dpdk_flags
if flag.length() > 0
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..4e26b584b3 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -42,6 +42,8 @@ enum rte_cpu_flag_t {
RTE_CPUFLAG_RISCV_ISA_X, /* Non-standard extension present */
RTE_CPUFLAG_RISCV_ISA_Y, /* Reserved */
RTE_CPUFLAG_RISCV_ISA_Z, /* Reserved */
+
+ RTE_CPUFLAG_RISCV_EXT_ZBC, /* Carry-less multiplication */
};
#include "generic/rte_cpuflags.h"
diff --git a/lib/eal/riscv/rte_cpuflags.c b/lib/eal/riscv/rte_cpuflags.c
index eb4105c18b..dedf0395ab 100644
--- a/lib/eal/riscv/rte_cpuflags.c
+++ b/lib/eal/riscv/rte_cpuflags.c
@@ -11,6 +11,15 @@
#include <assert.h>
#include <unistd.h>
#include <string.h>
+#include <sys/syscall.h>
+
+/*
+ * when hardware probing is not possible, we assume all extensions are missing
+ * at runtime
+ */
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+#include <asm/hwprobe.h>
+#endif
#ifndef AT_HWCAP
#define AT_HWCAP 16
@@ -29,54 +38,90 @@ enum cpu_register_t {
REG_HWCAP,
REG_HWCAP2,
REG_PLATFORM,
- REG_MAX
+ REG_HWPROBE_IMA_EXT_0,
+ REG_MAX,
};
-typedef uint32_t hwcap_registers_t[REG_MAX];
+typedef uint64_t hwcap_registers_t[REG_MAX];
/**
* Struct to hold a processor feature entry
*/
struct feature_entry {
uint32_t reg;
- uint32_t bit;
+ uint64_t mask;
#define CPU_FLAG_NAME_MAX_LEN 64
char name[CPU_FLAG_NAME_MAX_LEN];
};
-#define FEAT_DEF(name, reg, bit) \
- [RTE_CPUFLAG_##name] = {reg, bit, #name},
+#define FEAT_DEF(name, reg, mask) \
+ [RTE_CPUFLAG_##name] = {reg, mask, #name},
typedef Elf64_auxv_t _Elfx_auxv_t;
const struct feature_entry rte_cpu_feature_table[] = {
- FEAT_DEF(RISCV_ISA_A, REG_HWCAP, 0)
- FEAT_DEF(RISCV_ISA_B, REG_HWCAP, 1)
- FEAT_DEF(RISCV_ISA_C, REG_HWCAP, 2)
- FEAT_DEF(RISCV_ISA_D, REG_HWCAP, 3)
- FEAT_DEF(RISCV_ISA_E, REG_HWCAP, 4)
- FEAT_DEF(RISCV_ISA_F, REG_HWCAP, 5)
- FEAT_DEF(RISCV_ISA_G, REG_HWCAP, 6)
- FEAT_DEF(RISCV_ISA_H, REG_HWCAP, 7)
- FEAT_DEF(RISCV_ISA_I, REG_HWCAP, 8)
- FEAT_DEF(RISCV_ISA_J, REG_HWCAP, 9)
- FEAT_DEF(RISCV_ISA_K, REG_HWCAP, 10)
- FEAT_DEF(RISCV_ISA_L, REG_HWCAP, 11)
- FEAT_DEF(RISCV_ISA_M, REG_HWCAP, 12)
- FEAT_DEF(RISCV_ISA_N, REG_HWCAP, 13)
- FEAT_DEF(RISCV_ISA_O, REG_HWCAP, 14)
- FEAT_DEF(RISCV_ISA_P, REG_HWCAP, 15)
- FEAT_DEF(RISCV_ISA_Q, REG_HWCAP, 16)
- FEAT_DEF(RISCV_ISA_R, REG_HWCAP, 17)
- FEAT_DEF(RISCV_ISA_S, REG_HWCAP, 18)
- FEAT_DEF(RISCV_ISA_T, REG_HWCAP, 19)
- FEAT_DEF(RISCV_ISA_U, REG_HWCAP, 20)
- FEAT_DEF(RISCV_ISA_V, REG_HWCAP, 21)
- FEAT_DEF(RISCV_ISA_W, REG_HWCAP, 22)
- FEAT_DEF(RISCV_ISA_X, REG_HWCAP, 23)
- FEAT_DEF(RISCV_ISA_Y, REG_HWCAP, 24)
- FEAT_DEF(RISCV_ISA_Z, REG_HWCAP, 25)
+ FEAT_DEF(RISCV_ISA_A, REG_HWCAP, 1 << 0)
+ FEAT_DEF(RISCV_ISA_B, REG_HWCAP, 1 << 1)
+ FEAT_DEF(RISCV_ISA_C, REG_HWCAP, 1 << 2)
+ FEAT_DEF(RISCV_ISA_D, REG_HWCAP, 1 << 3)
+ FEAT_DEF(RISCV_ISA_E, REG_HWCAP, 1 << 4)
+ FEAT_DEF(RISCV_ISA_F, REG_HWCAP, 1 << 5)
+ FEAT_DEF(RISCV_ISA_G, REG_HWCAP, 1 << 6)
+ FEAT_DEF(RISCV_ISA_H, REG_HWCAP, 1 << 7)
+ FEAT_DEF(RISCV_ISA_I, REG_HWCAP, 1 << 8)
+ FEAT_DEF(RISCV_ISA_J, REG_HWCAP, 1 << 9)
+ FEAT_DEF(RISCV_ISA_K, REG_HWCAP, 1 << 10)
+ FEAT_DEF(RISCV_ISA_L, REG_HWCAP, 1 << 11)
+ FEAT_DEF(RISCV_ISA_M, REG_HWCAP, 1 << 12)
+ FEAT_DEF(RISCV_ISA_N, REG_HWCAP, 1 << 13)
+ FEAT_DEF(RISCV_ISA_O, REG_HWCAP, 1 << 14)
+ FEAT_DEF(RISCV_ISA_P, REG_HWCAP, 1 << 15)
+ FEAT_DEF(RISCV_ISA_Q, REG_HWCAP, 1 << 16)
+ FEAT_DEF(RISCV_ISA_R, REG_HWCAP, 1 << 17)
+ FEAT_DEF(RISCV_ISA_S, REG_HWCAP, 1 << 18)
+ FEAT_DEF(RISCV_ISA_T, REG_HWCAP, 1 << 19)
+ FEAT_DEF(RISCV_ISA_U, REG_HWCAP, 1 << 20)
+ FEAT_DEF(RISCV_ISA_V, REG_HWCAP, 1 << 21)
+ FEAT_DEF(RISCV_ISA_W, REG_HWCAP, 1 << 22)
+ FEAT_DEF(RISCV_ISA_X, REG_HWCAP, 1 << 23)
+ FEAT_DEF(RISCV_ISA_Y, REG_HWCAP, 1 << 24)
+ FEAT_DEF(RISCV_ISA_Z, REG_HWCAP, 1 << 25)
+
+#ifdef RTE_RISCV_FEATURE_ZBC
+ FEAT_DEF(RISCV_EXT_ZBC, REG_HWPROBE_IMA_EXT_0, RISCV_HWPROBE_EXT_ZBC)
+#else
+ FEAT_DEF(RISCV_EXT_ZBC, REG_HWPROBE_IMA_EXT_0, 0)
+#endif
};
+
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+/*
+ * Use kernel interface for probing hardware capabilities to get extensions
+ * present on this machine
+ */
+static uint64_t
+rte_cpu_hwprobe_ima_ext(void)
+{
+ long ret;
+ struct riscv_hwprobe extensions_pair;
+
+ struct riscv_hwprobe *pairs = &extensions_pair;
+ size_t pair_count = 1;
+ /* empty set of cpus returns extensions present on all cpus */
+ cpu_set_t *cpus = NULL;
+ size_t cpusetsize = 0;
+ unsigned int flags = 0;
+
+ extensions_pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ ret = syscall(__NR_riscv_hwprobe, pairs, pair_count, cpusetsize, cpus,
+ flags);
+
+ if (ret != 0)
+ return 0;
+ return extensions_pair.value;
+}
+#endif /* RTE_RISCV_FEATURE_HWPROBE */
+
/*
* Read AUXV software register and get cpu features for ARM
*/
@@ -85,6 +130,9 @@ rte_cpu_get_features(hwcap_registers_t out)
{
out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+ out[REG_HWPROBE_IMA_EXT_0] = rte_cpu_hwprobe_ima_ext();
+#endif
}
/*
@@ -104,7 +152,7 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
return -EFAULT;
rte_cpu_get_features(regs);
- return (regs[feat->reg] >> feat->bit) & 1;
+ return (regs[feat->reg] & feat->mask) != 0;
}
const char *
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 2/9] hash: implement crc using riscv carryless multiply
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 3/9] net: " Daniel Gregory
` (8 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Thomas Monjalon, Yipeng Wang, Sameh Gobriel,
Bruce Richardson, Vladimir Medvedkin
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
Using carryless multiply instructions from RISC-V's Zbc extension,
implement a Barrett reduction that calculates CRC-32C checksums.
Based on the approach described by Intel's whitepaper on "Fast CRC
Computation for Generic Polynomials Using PCLMULQDQ Instruction", which
is also described here
(https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/)
Add a case to the autotest_hash unit test.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_hash.c | 7 +++
lib/hash/meson.build | 1 +
lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++
lib/hash/rte_hash_crc.c | 13 +++++-
lib/hash/rte_hash_crc.h | 6 ++-
6 files changed, 115 insertions(+), 2 deletions(-)
create mode 100644 lib/hash/rte_crc_riscv64.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 533f707d5f..81f13ebcf2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -318,6 +318,7 @@ M: Stanislaw Kardach <stanislaw.kardach@gmail.com>
F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
+F: lib/hash/rte_crc_riscv64.h
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 24d3b547ad..c8c4197ad8 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -205,6 +205,13 @@ test_crc32_hash_alg_equiv(void)
printf("Failed checking CRC32_SW against CRC32_ARM64\n");
break;
}
+
+ /* Check against 8-byte-operand RISCV64 CRC32 if available */
+ rte_hash_crc_set_alg(CRC32_RISCV64);
+ if (hash_val != rte_hash_crc(data64, data_len, init_val)) {
+ printf("Failed checking CRC32_SW against CRC32_RISC64\n");
+ break;
+ }
}
/* Resetting to best available algorithm */
diff --git a/lib/hash/meson.build b/lib/hash/meson.build
index 277eb9fa93..8355869a80 100644
--- a/lib/hash/meson.build
+++ b/lib/hash/meson.build
@@ -12,6 +12,7 @@ headers = files(
indirect_headers += files(
'rte_crc_arm64.h',
'rte_crc_generic.h',
+ 'rte_crc_riscv64.h',
'rte_crc_sw.h',
'rte_crc_x86.h',
'rte_thash_x86_gfni.h',
diff --git a/lib/hash/rte_crc_riscv64.h b/lib/hash/rte_crc_riscv64.h
new file mode 100644
index 0000000000..94f6857c69
--- /dev/null
+++ b/lib/hash/rte_crc_riscv64.h
@@ -0,0 +1,89 @@
+/* SPDX-License_Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <riscv_bitmanip.h>
+
+#ifndef _RTE_CRC_RISCV64_H_
+#define _RTE_CRC_RISCV64_H_
+
+/*
+ * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected
+ * output. As reflecting the value we're checksumming is expensive, we instead
+ * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm.
+ *
+ * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6)
+ * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc
+ * using only two multiplications (https://mary.rs/lab/crc32/)
+ */
+static const uint64_t p = 0x105EC76F1;
+static const uint64_t mu = 0x4869EC38DEA713F1UL;
+
+/* Calculate the CRC32C checksum using a Barrett reduction */
+static inline uint32_t
+crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ crc = __riscv_clmul_64(crc, mu);
+ /* Multiply by P */
+ crc = __riscv_clmulh_64(crc, p);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ crc ^= init_val >> bits;
+
+ return crc;
+}
+
+/*
+ * Use carryless multiply to perform hash on a value, falling back on the
+ * software in case the Zbc extension is not supported
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 8);
+
+ return crc32c_1byte(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 16);
+
+ return crc32c_2bytes(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 32);
+
+ return crc32c_1word(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 64);
+
+ return crc32c_2words(data, init_val);
+}
+
+#endif /* _RTE_CRC_RISCV64_H_ */
diff --git a/lib/hash/rte_hash_crc.c b/lib/hash/rte_hash_crc.c
index c037cdb0f0..3eb696a576 100644
--- a/lib/hash/rte_hash_crc.c
+++ b/lib/hash/rte_hash_crc.c
@@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO);
uint8_t rte_hash_crc32_alg = CRC32_SW;
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISCV64 Zbc extension if available
*
*/
void
@@ -52,6 +53,14 @@ rte_hash_crc_set_alg(uint8_t alg)
rte_hash_crc32_alg = CRC32_ARM64;
#endif
+#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ if (!(alg & CRC32_RISCV64))
+ HASH_CRC_LOG(WARNING,
+ "Unsupported CRC32 algorithm requested using CRC32_RISCV64");
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+ rte_hash_crc32_alg = CRC32_RISCV64;
+#endif
+
if (rte_hash_crc32_alg == CRC32_SW)
HASH_CRC_LOG(WARNING,
"Unsupported CRC32 algorithm requested using CRC32_SW");
@@ -64,6 +73,8 @@ RTE_INIT(rte_hash_crc_init_alg)
rte_hash_crc_set_alg(CRC32_SSE42_x64);
#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
rte_hash_crc_set_alg(CRC32_ARM64);
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ rte_hash_crc_set_alg(CRC32_RISCV64);
#else
rte_hash_crc_set_alg(CRC32_SW);
#endif
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..034ce1f8b4 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -28,6 +28,7 @@ extern "C" {
#define CRC32_x64 (1U << 2)
#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
#define CRC32_ARM64 (1U << 3)
+#define CRC32_RISCV64 (1U << 4)
extern uint8_t rte_hash_crc32_alg;
@@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg;
#include "rte_crc_arm64.h"
#elif defined(RTE_ARCH_X86)
#include "rte_crc_x86.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+#include "rte_crc_riscv64.h"
#else
#include "rte_crc_generic.h"
#endif
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default rv64gc_zbc)
*/
void
rte_hash_crc_set_alg(uint8_t alg);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 3/9] net: implement crc using riscv carryless multiply
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 2/9] hash: implement crc using riscv carryless multiply Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
` (7 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Thomas Monjalon, Jasvinder Singh
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
Using carryless multiply instructions (clmul) from RISC-V's Zbc
extension, implement CRC-32 and CRC-16 calculations on buffers.
Based on the approach described in Intel's whitepaper on "Fast CRC
Computation for Generic Polynomails Using PCLMULQDQ Instructions", we
perform repeated folds-by-1 whilst the buffer is still big enough, then
perform Barrett's reductions on the rest.
Add a case to the crc_autotest suite that tests this implementation.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_crc.c | 9 ++
lib/net/meson.build | 4 +
lib/net/net_crc.h | 11 +++
lib/net/net_crc_zbc.c | 191 ++++++++++++++++++++++++++++++++++++++++++
lib/net/rte_net_crc.c | 40 +++++++++
lib/net/rte_net_crc.h | 2 +
7 files changed, 258 insertions(+)
create mode 100644 lib/net/net_crc_zbc.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 81f13ebcf2..58fbc51e64 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -319,6 +319,7 @@ F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
F: lib/hash/rte_crc_riscv64.h
+F: lib/net/net_crc_zbc.c
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
diff --git a/app/test/test_crc.c b/app/test/test_crc.c
index b85fca35fe..fa91557cf5 100644
--- a/app/test/test_crc.c
+++ b/app/test/test_crc.c
@@ -168,6 +168,15 @@ test_crc(void)
return ret;
}
+ /* set CRC riscv mode */
+ rte_net_crc_set_alg(RTE_NET_CRC_ZBC);
+
+ ret = test_crc_calc();
+ if (ret < 0) {
+ printf("test crc (riscv64 zbc clmul): failed (%d)\n", ret);
+ return ret;
+ }
+
return 0;
}
diff --git a/lib/net/meson.build b/lib/net/meson.build
index 0b69138949..404d8dd3ae 100644
--- a/lib/net/meson.build
+++ b/lib/net/meson.build
@@ -125,4 +125,8 @@ elif (dpdk_conf.has('RTE_ARCH_ARM64') and
cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '')
sources += files('net_crc_neon.c')
cflags += ['-DCC_ARM64_NEON_PMULL_SUPPORT']
+elif (dpdk_conf.has('RTE_ARCH_RISCV') and
+ cc.get_define('RTE_RISCV_FEATURE_ZBC', args: machine_args) != '')
+ sources += files('net_crc_zbc.c')
+ cflags += ['-DCC_RISCV64_ZBC_CLMUL_SUPPORT']
endif
diff --git a/lib/net/net_crc.h b/lib/net/net_crc.h
index 7a74d5406c..06ae113b47 100644
--- a/lib/net/net_crc.h
+++ b/lib/net/net_crc.h
@@ -42,4 +42,15 @@ rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
uint32_t
rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
+/* RISCV64 Zbc */
+void
+rte_net_crc_zbc_init(void);
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+
#endif /* _NET_CRC_H_ */
diff --git a/lib/net/net_crc_zbc.c b/lib/net/net_crc_zbc.c
new file mode 100644
index 0000000000..be416ba52f
--- /dev/null
+++ b/lib/net/net_crc_zbc.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <riscv_bitmanip.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_net_crc.h>
+
+#include "net_crc.h"
+
+/* CLMUL CRC computation context structure */
+struct crc_clmul_ctx {
+ uint64_t Pr;
+ uint64_t mu;
+ uint64_t k3;
+ uint64_t k4;
+ uint64_t k5;
+};
+
+struct crc_clmul_ctx crc32_eth_clmul;
+struct crc_clmul_ctx crc16_ccitt_clmul;
+
+/* Perform Barrett's reduction on 8, 16, 32 or 64-bit value */
+static inline uint32_t
+crc32_barrett_zbc(
+ const uint64_t data,
+ uint32_t crc,
+ uint32_t bits,
+ const struct crc_clmul_ctx *params)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t temp = (uint64_t)(data ^ crc) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ temp = __riscv_clmul_64(temp, params->mu);
+ /* Multiply by P */
+ temp = __riscv_clmulh_64(temp, params->Pr);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ temp ^= crc >> bits;
+
+ return temp;
+}
+
+/* Repeat Barrett's reduction for short buffer sizes */
+static inline uint32_t
+crc32_repeated_barrett_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ while (data_len >= 8) {
+ crc = crc32_barrett_zbc(*(const uint64_t *)data, crc, 64, params);
+ data += 8;
+ data_len -= 8;
+ }
+ if (data_len >= 4) {
+ crc = crc32_barrett_zbc(*(const uint32_t *)data, crc, 32, params);
+ data += 4;
+ data_len -= 4;
+ }
+ if (data_len >= 2) {
+ crc = crc32_barrett_zbc(*(const uint16_t *)data, crc, 16, params);
+ data += 2;
+ data_len -= 2;
+ }
+ if (data_len >= 1)
+ crc = crc32_barrett_zbc(*(const uint8_t *)data, crc, 8, params);
+
+ return crc;
+}
+
+/* Perform a reduction by 1 on a buffer (minimum length 2) */
+static inline void
+crc32_reduce_zbc(const uint64_t *data, uint64_t *high, uint64_t *low,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t highh = __riscv_clmulh_64(params->k3, *high);
+ uint64_t highl = __riscv_clmul_64(params->k3, *high);
+ uint64_t lowh = __riscv_clmulh_64(params->k4, *low);
+ uint64_t lowl = __riscv_clmul_64(params->k4, *low);
+
+ *high = highl ^ lowl;
+ *low = highh ^ lowh;
+
+ *high ^= *(data++);
+ *low ^= *(data++);
+}
+
+static inline uint32_t
+crc32_eth_calc_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t high, low;
+ /* Minimum length we can do reduction-by-1 over */
+ const uint32_t min_len = 16;
+ /* Barrett reduce until buffer aligned to 8-byte word */
+ uint32_t misalign = (size_t)data & 7;
+ if (misalign != 0 && misalign <= data_len) {
+ crc = crc32_repeated_barrett_zbc(data, misalign, crc, params);
+ data += misalign;
+ data_len -= misalign;
+ }
+
+ if (data_len < min_len)
+ return crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ /* Fold buffer into two 8-byte words */
+ high = *((const uint64_t *)data) ^ crc;
+ low = *((const uint64_t *)(data + 8));
+ data += 16;
+ data_len -= 16;
+
+ for (; data_len >= 16; data_len -= 16, data += 16)
+ crc32_reduce_zbc((const uint64_t *)data, &high, &low, params);
+
+ /* Fold last 128 bits into 96 */
+ low = __riscv_clmul_64(params->k4, high) ^ low;
+ high = __riscv_clmulh_64(params->k4, high);
+ /* Upper 32 bits of high are now zero */
+ high = (low >> 32) | (high << 32);
+
+ /* Fold last 96 bits into 64 */
+ low = __riscv_clmul_64(low & 0xffffffff, params->k5);
+ low ^= high;
+
+ /*
+ * Barrett reduction of remaining 64 bits, using high to store initial
+ * value of low
+ */
+ high = low;
+ low = __riscv_clmul_64(low, params->mu);
+ low &= 0xffffffff;
+ low = __riscv_clmul_64(low, params->Pr);
+ crc = (high ^ low) >> 32;
+
+ /* Combine crc with any excess */
+ crc = crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ return crc;
+}
+
+void
+rte_net_crc_zbc_init(void)
+{
+ /* Initialise CRC32 data */
+ crc32_eth_clmul.Pr = 0x1db710641LL; /* polynomial P reversed */
+ crc32_eth_clmul.mu = 0xb4e5b025f7011641LL; /* (2 ^ 64 / P) reversed */
+ crc32_eth_clmul.k3 = 0x1751997d0LL; /* (x^(128+32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k4 = 0x0ccaa009eLL; /* (x^(128-32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k5 = 0x163cd6124LL; /* (x^64 mod P << 32) reversed << 1 */
+
+ /* Initialise CRC16 data */
+ /* Same calculations as above, with polynomial << 16 */
+ crc16_ccitt_clmul.Pr = 0x10811LL;
+ crc16_ccitt_clmul.mu = 0x859b040b1c581911LL;
+ crc16_ccitt_clmul.k3 = 0x8e10LL;
+ crc16_ccitt_clmul.k4 = 0x189aeLL;
+ crc16_ccitt_clmul.k5 = 0x114aaLL;
+}
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ /* Negate the crc, which is present in the lower 16-bits */
+ return (uint16_t)~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffff,
+ &crc16_ccitt_clmul);
+}
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ return ~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffffffffUL,
+ &crc32_eth_clmul);
+}
diff --git a/lib/net/rte_net_crc.c b/lib/net/rte_net_crc.c
index 346c285c15..9f04a0cb57 100644
--- a/lib/net/rte_net_crc.c
+++ b/lib/net/rte_net_crc.c
@@ -67,6 +67,12 @@ static const rte_net_crc_handler handlers_neon[] = {
[RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler,
};
#endif
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+static const rte_net_crc_handler handlers_zbc[] = {
+ [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_zbc_handler,
+ [RTE_NET_CRC32_ETH] = rte_crc32_eth_zbc_handler,
+};
+#endif
static uint16_t max_simd_bitwidth;
@@ -244,6 +250,31 @@ neon_pmull_init(void)
#endif
}
+/* ZBC/CLMUL handling */
+
+#define ZBC_CLMUL_CPU_SUPPORTED \
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC)
+
+static const rte_net_crc_handler *
+zbc_clmul_get_handlers(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ return handlers_zbc;
+#endif
+ NET_LOG(INFO, "Requirements not met, can't use Zbc");
+ return NULL;
+}
+
+static void
+zbc_clmul_init(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ rte_net_crc_zbc_init();
+#endif
+}
+
/* Default handling */
static uint32_t
@@ -260,6 +291,9 @@ rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = neon_pmull_get_handlers();
+ if (handlers != NULL)
+ return handlers[RTE_NET_CRC16_CCITT](data, data_len);
+ handlers = zbc_clmul_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = handlers_scalar;
@@ -282,6 +316,8 @@ rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
handlers = neon_pmull_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC32_ETH](data, data_len);
+ handlers = zbc_clmul_get_handlers();
+ return handlers[RTE_NET_CRC32_ETH](data, data_len);
handlers = handlers_scalar;
return handlers[RTE_NET_CRC32_ETH](data, data_len);
}
@@ -306,6 +342,9 @@ rte_net_crc_set_alg(enum rte_net_crc_alg alg)
break; /* for x86, always break here */
case RTE_NET_CRC_NEON:
handlers = neon_pmull_get_handlers();
+ break;
+ case RTE_NET_CRC_ZBC:
+ handlers = zbc_clmul_get_handlers();
/* fall-through */
case RTE_NET_CRC_SCALAR:
/* fall-through */
@@ -338,4 +377,5 @@ RTE_INIT(rte_net_crc_init)
sse42_pclmulqdq_init();
avx512_vpclmulqdq_init();
neon_pmull_init();
+ zbc_clmul_init();
}
diff --git a/lib/net/rte_net_crc.h b/lib/net/rte_net_crc.h
index 72d3e10ff6..12fa6a8a02 100644
--- a/lib/net/rte_net_crc.h
+++ b/lib/net/rte_net_crc.h
@@ -24,6 +24,7 @@ enum rte_net_crc_alg {
RTE_NET_CRC_SSE42,
RTE_NET_CRC_NEON,
RTE_NET_CRC_AVX512,
+ RTE_NET_CRC_ZBC,
};
/**
@@ -37,6 +38,7 @@ enum rte_net_crc_alg {
* - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
* - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)
* - RTE_NET_CRC_AVX512 (Use 512-bit AVX intrinsic)
+ * - RTE_NET_CRC_ZBC (Use RISC-V Zbc extension)
*/
void
rte_net_crc_set_alg(enum rte_net_crc_alg alg);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 4/9] config/riscv: add qemu crossbuild target
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (2 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 3/9] net: " Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 5/9] examples/l3fwd: use accelerated crc on riscv Daniel Gregory
` (6 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Bruce Richardson
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
A new cross-compilation target that has extensions that DPDK uses and
QEMU supports. Initially, this is just the Zbc extension for hardware
crc support.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
config/riscv/meson.build | 3 ++-
config/riscv/riscv64_qemu_linux_gcc | 17 +++++++++++++++++
.../linux_gsg/cross_build_dpdk_for_riscv.rst | 5 +++++
3 files changed, 24 insertions(+), 1 deletion(-)
create mode 100644 config/riscv/riscv64_qemu_linux_gcc
diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index 5d8411b254..337b26bbac 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -43,7 +43,8 @@ vendor_generic = {
['RTE_MAX_NUMA_NODES', 2]
],
'arch_config': {
- 'generic': {'machine_args': ['-march=rv64gc']}
+ 'generic': {'machine_args': ['-march=rv64gc']},
+ 'qemu': {'machine_args': ['-march=rv64gc_zbc']},
}
}
diff --git a/config/riscv/riscv64_qemu_linux_gcc b/config/riscv/riscv64_qemu_linux_gcc
new file mode 100644
index 0000000000..007cc98885
--- /dev/null
+++ b/config/riscv/riscv64_qemu_linux_gcc
@@ -0,0 +1,17 @@
+[binaries]
+c = ['ccache', 'riscv64-linux-gnu-gcc']
+cpp = ['ccache', 'riscv64-linux-gnu-g++']
+ar = 'riscv64-linux-gnu-ar'
+strip = 'riscv64-linux-gnu-strip'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'riscv64'
+cpu = 'rv64gc_zbc'
+endian = 'little'
+
+[properties]
+vendor_id = 'generic'
+arch_id = 'qemu'
+pkg_config_libdir = '/usr/lib/riscv64-linux-gnu/pkgconfig'
diff --git a/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst b/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
index 7d7f7ac72b..c3b67671a0 100644
--- a/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
+++ b/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
@@ -110,6 +110,11 @@ Currently the following targets are supported:
* SiFive U740 SoC: ``config/riscv/riscv64_sifive_u740_linux_gcc``
+* QEMU: ``config/riscv/riscv64_qemu_linux_gcc``
+
+ * A target with all the extensions that QEMU supports that DPDK has a use for
+ (currently ``rv64gc_zbc``). Requires QEMU version 7.0.0 or newer.
+
To add a new target support, ``config/riscv/meson.build`` has to be modified by
adding a new vendor/architecture id and a corresponding cross-file has to be
added to ``config/riscv`` directory.
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 5/9] examples/l3fwd: use accelerated crc on riscv
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (3 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 6/9] ipfrag: " Daniel Gregory
` (5 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
examples/l3fwd/l3fwd_em.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index d98e66ea2c..78cec7f5cc 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -29,7 +29,7 @@
#include "l3fwd_event.h"
#include "em_route_parse.c"
-#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
+#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) || defined(RTE_RISCV_FEATURE_ZBC)
#define EM_HASH_CRC 1
#endif
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 6/9] ipfrag: use accelerated crc on riscv
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (4 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 5/9] examples/l3fwd: use accelerated crc on riscv Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 7/9] examples/l3fwd-power: " Daniel Gregory
` (4 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Konstantin Ananyev
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/ip_frag/ip_frag_internal.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/ip_frag/ip_frag_internal.c b/lib/ip_frag/ip_frag_internal.c
index 7cbef647df..19a28c447b 100644
--- a/lib/ip_frag/ip_frag_internal.c
+++ b/lib/ip_frag/ip_frag_internal.c
@@ -45,14 +45,14 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *)&key->src_dst;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_RISCV_FEATURE_ZBC)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(key->id, v);
#else
v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
-#endif /* RTE_ARCH_X86 */
+#endif /* RTE_ARCH_X86 || RTE_ARCH_ARM64 || RTE_RISCV_FEATURE_ZBC */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
@@ -66,7 +66,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *) &key->src_dst;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_RISCV_FEATURE_ZBC)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(p[2], v);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 7/9] examples/l3fwd-power: use accelerated crc on riscv
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (5 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 6/9] ipfrag: " Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 8/9] hash/cuckoo: " Daniel Gregory
` (3 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Anatoly Burakov, David Hunt, Sivaprasad Tummala
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
examples/l3fwd-power/main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index fba11da7ca..c67a3c4011 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -270,7 +270,7 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
-#ifdef RTE_ARCH_X86
+#if defined(RTE_ARCH_X86) || defined(RTE_RISCV_FEATURE_ZBC)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 8/9] hash/cuckoo: use accelerated crc on riscv
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (6 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 7/9] examples/l3fwd-power: " Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 9/9] member: " Daniel Gregory
` (2 subsequent siblings)
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Yipeng Wang, Sameh Gobriel, Bruce Richardson,
Vladimir Medvedkin
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/hash/rte_cuckoo_hash.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index d87aa52b5b..8bdb1ff69d 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -409,6 +409,9 @@ rte_hash_create(const struct rte_hash_parameters *params)
#elif defined(RTE_ARCH_ARM64)
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
default_hash_func = (rte_hash_function)rte_hash_crc;
+#elif defined(RTE_ARCH_RISCV)
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+ default_hash_func = (rte_hash_function)rte_hash_crc;
#endif
/* Setup hash context */
strlcpy(h->name, params->name, sizeof(h->name));
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 9/9] member: use accelerated crc on riscv
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (7 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 8/9] hash/cuckoo: " Daniel Gregory
@ 2024-07-12 15:46 ` Daniel Gregory
2024-07-12 17:19 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc David Marchand
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
10 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-07-12 15:46 UTC (permalink / raw)
To: Stanislaw Kardach, Yipeng Wang, Sameh Gobriel
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/member/rte_member.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..152659628a 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -92,7 +92,7 @@ typedef uint16_t member_set_t;
#define RTE_MEMBER_SKETCH_COUNT_BYTE 0x02
/** @internal Hash function used by membership library. */
-#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
+#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) || defined(RTE_RISCV_FEATURE_ZBC)
#include <rte_hash_crc.h>
#define MEMBER_HASH_FUNC rte_hash_crc
#else
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v2 0/9] riscv: implement accelerated crc using zbc
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (8 preceding siblings ...)
2024-07-12 15:46 ` [PATCH v2 9/9] member: " Daniel Gregory
@ 2024-07-12 17:19 ` David Marchand
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
10 siblings, 0 replies; 36+ messages in thread
From: David Marchand @ 2024-07-12 17:19 UTC (permalink / raw)
To: Daniel Gregory
Cc: Stanislaw Kardach, dev, Punit Agrawal, Liang Ma, Pengcheng Wang,
Chunsong Feng, Stephen Hemminger, Sachin Saxena
On Fri, Jul 12, 2024 at 5:47 PM Daniel Gregory
<daniel.gregory@bytedance.com> wrote:
>
> The RISC-V Zbc extension adds instructions for carry-less multiplication
> we can use to implement CRC in hardware. This patch set contains two new
> implementations:
>
> - one in lib/hash/rte_crc_riscv64.h that uses a Barrett reduction to
> implement the four rte_hash_crc_* functions
> - one in lib/net/net_crc_zbc.c that uses repeated single-folds to reduce
> the buffer until it is small enough for a Barrett reduction to
> implement rte_crc16_ccitt_zbc_handler and rte_crc32_eth_zbc_handler
>
> My approach is largely based on the Intel's "Fast CRC Computation Using
> PCLMULQDQ Instruction" white paper
> https://www.researchgate.net/publication/263424619_Fast_CRC_computation
> and a post about "Optimizing CRC32 for small payload sizes on x86"
> https://mary.rs/lab/crc32/
>
> Whether these new implementations are enabled is controlled by new
> build-time and run-time detection of the RISC-V extensions present in
> the compiler and on the target system.
>
> I have carried out some performance comparisons between the generic
> table implementations and the new hardware implementations. Listed below
> is the number of cycles it takes to compute the CRC hash for buffers of
> various sizes (as reported by rte_get_timer_cycles()). These results
> were collected on a Kendryte K230 and averaged over 20 samples:
>
> |Buffer | CRC32-ETH (lib/net) | CRC32C (lib/hash) |
> |Size (MB) | Table | Hardware | Table | Hardware |
> |----------|----------|----------|----------|----------|
> | 1 | 155168 | 11610 | 73026 | 18385 |
> | 2 | 311203 | 22998 | 145586 | 35886 |
> | 3 | 466744 | 34370 | 218536 | 53939 |
> | 4 | 621843 | 45536 | 291574 | 71944 |
> | 5 | 777908 | 56989 | 364152 | 89706 |
> | 6 | 932736 | 68023 | 437016 | 107726 |
> | 7 | 1088756 | 79236 | 510197 | 125426 |
> | 8 | 1243794 | 90467 | 583231 | 143614 |
>
> These results suggest a speed-up of lib/net by thirteen times, and of
> lib/hash by four times.
>
> I have also run the hash_functions_autotest benchmark in dpdk_test,
> which measures the performance of the lib/hash implementation on small
> buffers, getting the following times:
>
> | Key Length | Time (ticks/op) |
> | (bytes) | Table | Hardware |
> |------------|----------|----------|
> | 1 | 0.47 | 0.85 |
> | 2 | 0.57 | 0.87 |
> | 4 | 0.99 | 0.88 |
> | 8 | 1.35 | 0.88 |
> | 9 | 1.20 | 1.09 |
> | 13 | 1.76 | 1.35 |
> | 16 | 1.87 | 1.02 |
> | 32 | 2.96 | 0.98 |
> | 37 | 3.35 | 1.45 |
> | 40 | 3.49 | 1.12 |
> | 48 | 4.02 | 1.25 |
> | 64 | 5.08 | 1.54 |
Thanks for the submission.
This series comes late for v24.07 and there was no review, it is
deferred to v24.11.
Cc: Sachin for info.
--
David Marchand
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 0/9] riscv: implement accelerated crc using zbc
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
` (9 preceding siblings ...)
2024-07-12 17:19 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc David Marchand
@ 2024-08-27 15:32 ` Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
` (5 more replies)
10 siblings, 6 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:32 UTC (permalink / raw)
To: Stanislaw Kardach
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
The RISC-V Zbc extension adds instructions for carry-less multiplication
we can use to implement CRC in hardware. This patch set contains two new
implementations:
- one in lib/hash/rte_crc_riscv64.h that uses a Barrett reduction to
implement the four rte_hash_crc_* functions
- one in lib/net/net_crc_zbc.c that uses repeated single-folds to reduce
the buffer until it is small enough for a Barrett reduction to
implement rte_crc16_ccitt_zbc_handler and rte_crc32_eth_zbc_handler
My approach is largely based on the Intel's "Fast CRC Computation Using
PCLMULQDQ Instruction" white paper
https://www.researchgate.net/publication/263424619_Fast_CRC_computation
and a post about "Optimizing CRC32 for small payload sizes on x86"
https://mary.rs/lab/crc32/
Whether these new implementations are enabled is controlled by new
build-time and run-time detection of the RISC-V extensions present in
the compiler and on the target system.
I have carried out some performance comparisons between the generic
table implementations and the new hardware implementations. Listed below
is the number of cycles it takes to compute the CRC hash for buffers of
various sizes (as reported by rte_get_timer_cycles()). These results
were collected on a Kendryte K230 and averaged over 20 samples:
|Buffer | CRC32-ETH (lib/net) | CRC32C (lib/hash) |
|Size (MB) | Table | Hardware | Table | Hardware |
|----------|----------|----------|----------|----------|
| 1 | 155168 | 11610 | 73026 | 18385 |
| 2 | 311203 | 22998 | 145586 | 35886 |
| 3 | 466744 | 34370 | 218536 | 53939 |
| 4 | 621843 | 45536 | 291574 | 71944 |
| 5 | 777908 | 56989 | 364152 | 89706 |
| 6 | 932736 | 68023 | 437016 | 107726 |
| 7 | 1088756 | 79236 | 510197 | 125426 |
| 8 | 1243794 | 90467 | 583231 | 143614 |
These results suggest a speed-up of lib/net by thirteen times, and of
lib/hash by four times.
I have also run the hash_functions_autotest benchmark in dpdk_test,
which measures the performance of the lib/hash implementation on small
buffers, getting the following times:
| Key Length | Time (ticks/op) |
| (bytes) | Table | Hardware |
|------------|----------|----------|
| 1 | 0.47 | 0.85 |
| 2 | 0.57 | 0.87 |
| 4 | 0.99 | 0.88 |
| 8 | 1.35 | 0.88 |
| 9 | 1.20 | 1.09 |
| 13 | 1.76 | 1.35 |
| 16 | 1.87 | 1.02 |
| 32 | 2.96 | 0.98 |
| 37 | 3.35 | 1.45 |
| 40 | 3.49 | 1.12 |
| 48 | 4.02 | 1.25 |
| 64 | 5.08 | 1.54 |
v3:
- rebase on 24.07
- replace crc with CRC in commits (check-git-log.sh)
v2:
- replace compile flag with build-time (riscv extension macros) and
run-time detection (linux hwprobe syscall) (Stephen Hemminger)
- add qemu target that supports zbc (Stanislaw Kardach)
- fix spelling error in commit message
- fix a bug in the net/ implementation that would cause segfaults on
small unaligned buffers
- refactor net/ implementation to move variable declarations to top of
functions
- enable the optimisation in a couple other places optimised crc is
preferred to jhash
- l3fwd-power
- cuckoo-hash
Daniel Gregory (9):
config/riscv: detect presence of Zbc extension
hash: implement CRC using riscv carryless multiply
net: implement CRC using riscv carryless multiply
config/riscv: add qemu crossbuild target
examples/l3fwd: use accelerated CRC on riscv
ipfrag: use accelerated CRC on riscv
examples/l3fwd-power: use accelerated CRC on riscv
hash/cuckoo: use accelerated CRC on riscv
member: use accelerated CRC on riscv
MAINTAINERS | 2 +
app/test/test_crc.c | 9 +
app/test/test_hash.c | 7 +
config/riscv/meson.build | 44 +++-
config/riscv/riscv64_qemu_linux_gcc | 17 ++
.../linux_gsg/cross_build_dpdk_for_riscv.rst | 5 +
examples/l3fwd-power/main.c | 2 +-
examples/l3fwd/l3fwd_em.c | 2 +-
lib/eal/riscv/include/rte_cpuflags.h | 2 +
lib/eal/riscv/rte_cpuflags.c | 112 +++++++---
lib/hash/meson.build | 1 +
lib/hash/rte_crc_riscv64.h | 89 ++++++++
lib/hash/rte_cuckoo_hash.c | 3 +
lib/hash/rte_hash_crc.c | 13 +-
lib/hash/rte_hash_crc.h | 6 +-
lib/ip_frag/ip_frag_internal.c | 6 +-
lib/member/rte_member.h | 2 +-
lib/net/meson.build | 4 +
lib/net/net_crc.h | 11 +
lib/net/net_crc_zbc.c | 191 ++++++++++++++++++
lib/net/rte_net_crc.c | 40 ++++
lib/net/rte_net_crc.h | 2 +
22 files changed, 529 insertions(+), 41 deletions(-)
create mode 100644 config/riscv/riscv64_qemu_linux_gcc
create mode 100644 lib/hash/rte_crc_riscv64.h
create mode 100644 lib/net/net_crc_zbc.c
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 1/9] config/riscv: detect presence of Zbc extension
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
@ 2024-08-27 15:32 ` Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 2/9] hash: implement CRC using riscv carryless multiply Daniel Gregory
` (4 subsequent siblings)
5 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:32 UTC (permalink / raw)
To: Stanislaw Kardach, Bruce Richardson
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory, Stephen Hemminger
The RISC-V Zbc extension adds carry-less multiply instructions we can
use to implement more efficient CRC hashing algorithms.
The RISC-V C api defines architecture extension test macros
https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/riscv-c-api.md#architecture-extension-test-macros
These let us detect whether the Zbc extension is supported on the
compiler and -march we're building with. The C api also defines Zbc
intrinsics we can use rather than inline assembly on newer versions of
GCC (14.1.0+) and Clang (18.1.0+).
The Linux kernel exposes a RISC-V hardware probing syscall for getting
information about the system at run-time including which extensions are
available. We detect whether this interface is present by looking for
the <asm/hwprobe.h> header, as it's only present in newer kernels
(v6.4+). Furthermore, support for detecting certain extensions,
including Zbc, wasn't present until versions after this, so we need to
check the constants this header exports.
The kernel exposes bitmasks for each extension supported by the probing
interface, rather than the bit index that is set if that extensions is
present, so modify the existing cpu flag HWCAP table entries to line up
with this. The values returned by the interface are 64-bits long, so
grow the hwcap registers array to be able to hold them.
If the Zbc extension and intrinsics are both present and we can detect
the Zbc extension at runtime, we define a flag, RTE_RISCV_FEATURE_ZBC.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
config/riscv/meson.build | 41 ++++++++++
lib/eal/riscv/include/rte_cpuflags.h | 2 +
lib/eal/riscv/rte_cpuflags.c | 112 +++++++++++++++++++--------
3 files changed, 123 insertions(+), 32 deletions(-)
diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index 07d7d9da23..5d8411b254 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -119,6 +119,47 @@ foreach flag: arch_config['machine_args']
endif
endforeach
+# check if we can do buildtime detection of extensions supported by the target
+riscv_extension_macros = false
+if (cc.get_define('__riscv_arch_test', args: machine_args) == '1')
+ message('Detected architecture extension test macros')
+ riscv_extension_macros = true
+else
+ warning('RISC-V architecture extension test macros not available. Build-time detection of extensions not possible')
+endif
+
+# check if we can use hwprobe interface for runtime extension detection
+riscv_hwprobe = false
+if (cc.check_header('asm/hwprobe.h', args: machine_args))
+ message('Detected hwprobe interface, enabling runtime detection of supported extensions')
+ machine_args += ['-DRTE_RISCV_FEATURE_HWPROBE']
+ riscv_hwprobe = true
+else
+ warning('Hwprobe interface not available (present in Linux v6.4+), instruction extensions won\'t be enabled')
+endif
+
+# detect extensions
+# RISC-V Carry-less multiplication extension (Zbc) for hardware implementations
+# of CRC-32C (lib/hash/rte_crc_riscv64.h) and CRC-32/16 (lib/net/net_crc_zbc.c).
+# Requires intrinsics available in GCC 14.1.0+ and Clang 18.1.0+
+if (riscv_extension_macros and riscv_hwprobe and
+ (cc.get_define('__riscv_zbc', args: machine_args) != ''))
+ if ((cc.get_id() == 'gcc' and cc.version().version_compare('>=14.1.0'))
+ or (cc.get_id() == 'clang' and cc.version().version_compare('>=18.1.0')))
+ # determine whether we can detect Zbc extension (this wasn't possible until
+ # Linux kernel v6.8)
+ if (cc.compiles('''#include <asm/hwprobe.h>
+ int a = RISCV_HWPROBE_EXT_ZBC;''', args: machine_args))
+ message('Compiling with the Zbc extension')
+ machine_args += ['-DRTE_RISCV_FEATURE_ZBC']
+ else
+ warning('Detected Zbc extension but cannot use because runtime detection doesn\'t support it (support present in Linux kernel v6.8+)')
+ endif
+ else
+ warning('Detected Zbc extension but cannot use because intrinsics are not available (present in GCC 14.1.0+ and Clang 18.1.0+)')
+ endif
+endif
+
# apply flags
foreach flag: dpdk_flags
if flag.length() > 0
diff --git a/lib/eal/riscv/include/rte_cpuflags.h b/lib/eal/riscv/include/rte_cpuflags.h
index d742efc40f..4e26b584b3 100644
--- a/lib/eal/riscv/include/rte_cpuflags.h
+++ b/lib/eal/riscv/include/rte_cpuflags.h
@@ -42,6 +42,8 @@ enum rte_cpu_flag_t {
RTE_CPUFLAG_RISCV_ISA_X, /* Non-standard extension present */
RTE_CPUFLAG_RISCV_ISA_Y, /* Reserved */
RTE_CPUFLAG_RISCV_ISA_Z, /* Reserved */
+
+ RTE_CPUFLAG_RISCV_EXT_ZBC, /* Carry-less multiplication */
};
#include "generic/rte_cpuflags.h"
diff --git a/lib/eal/riscv/rte_cpuflags.c b/lib/eal/riscv/rte_cpuflags.c
index eb4105c18b..dedf0395ab 100644
--- a/lib/eal/riscv/rte_cpuflags.c
+++ b/lib/eal/riscv/rte_cpuflags.c
@@ -11,6 +11,15 @@
#include <assert.h>
#include <unistd.h>
#include <string.h>
+#include <sys/syscall.h>
+
+/*
+ * when hardware probing is not possible, we assume all extensions are missing
+ * at runtime
+ */
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+#include <asm/hwprobe.h>
+#endif
#ifndef AT_HWCAP
#define AT_HWCAP 16
@@ -29,54 +38,90 @@ enum cpu_register_t {
REG_HWCAP,
REG_HWCAP2,
REG_PLATFORM,
- REG_MAX
+ REG_HWPROBE_IMA_EXT_0,
+ REG_MAX,
};
-typedef uint32_t hwcap_registers_t[REG_MAX];
+typedef uint64_t hwcap_registers_t[REG_MAX];
/**
* Struct to hold a processor feature entry
*/
struct feature_entry {
uint32_t reg;
- uint32_t bit;
+ uint64_t mask;
#define CPU_FLAG_NAME_MAX_LEN 64
char name[CPU_FLAG_NAME_MAX_LEN];
};
-#define FEAT_DEF(name, reg, bit) \
- [RTE_CPUFLAG_##name] = {reg, bit, #name},
+#define FEAT_DEF(name, reg, mask) \
+ [RTE_CPUFLAG_##name] = {reg, mask, #name},
typedef Elf64_auxv_t _Elfx_auxv_t;
const struct feature_entry rte_cpu_feature_table[] = {
- FEAT_DEF(RISCV_ISA_A, REG_HWCAP, 0)
- FEAT_DEF(RISCV_ISA_B, REG_HWCAP, 1)
- FEAT_DEF(RISCV_ISA_C, REG_HWCAP, 2)
- FEAT_DEF(RISCV_ISA_D, REG_HWCAP, 3)
- FEAT_DEF(RISCV_ISA_E, REG_HWCAP, 4)
- FEAT_DEF(RISCV_ISA_F, REG_HWCAP, 5)
- FEAT_DEF(RISCV_ISA_G, REG_HWCAP, 6)
- FEAT_DEF(RISCV_ISA_H, REG_HWCAP, 7)
- FEAT_DEF(RISCV_ISA_I, REG_HWCAP, 8)
- FEAT_DEF(RISCV_ISA_J, REG_HWCAP, 9)
- FEAT_DEF(RISCV_ISA_K, REG_HWCAP, 10)
- FEAT_DEF(RISCV_ISA_L, REG_HWCAP, 11)
- FEAT_DEF(RISCV_ISA_M, REG_HWCAP, 12)
- FEAT_DEF(RISCV_ISA_N, REG_HWCAP, 13)
- FEAT_DEF(RISCV_ISA_O, REG_HWCAP, 14)
- FEAT_DEF(RISCV_ISA_P, REG_HWCAP, 15)
- FEAT_DEF(RISCV_ISA_Q, REG_HWCAP, 16)
- FEAT_DEF(RISCV_ISA_R, REG_HWCAP, 17)
- FEAT_DEF(RISCV_ISA_S, REG_HWCAP, 18)
- FEAT_DEF(RISCV_ISA_T, REG_HWCAP, 19)
- FEAT_DEF(RISCV_ISA_U, REG_HWCAP, 20)
- FEAT_DEF(RISCV_ISA_V, REG_HWCAP, 21)
- FEAT_DEF(RISCV_ISA_W, REG_HWCAP, 22)
- FEAT_DEF(RISCV_ISA_X, REG_HWCAP, 23)
- FEAT_DEF(RISCV_ISA_Y, REG_HWCAP, 24)
- FEAT_DEF(RISCV_ISA_Z, REG_HWCAP, 25)
+ FEAT_DEF(RISCV_ISA_A, REG_HWCAP, 1 << 0)
+ FEAT_DEF(RISCV_ISA_B, REG_HWCAP, 1 << 1)
+ FEAT_DEF(RISCV_ISA_C, REG_HWCAP, 1 << 2)
+ FEAT_DEF(RISCV_ISA_D, REG_HWCAP, 1 << 3)
+ FEAT_DEF(RISCV_ISA_E, REG_HWCAP, 1 << 4)
+ FEAT_DEF(RISCV_ISA_F, REG_HWCAP, 1 << 5)
+ FEAT_DEF(RISCV_ISA_G, REG_HWCAP, 1 << 6)
+ FEAT_DEF(RISCV_ISA_H, REG_HWCAP, 1 << 7)
+ FEAT_DEF(RISCV_ISA_I, REG_HWCAP, 1 << 8)
+ FEAT_DEF(RISCV_ISA_J, REG_HWCAP, 1 << 9)
+ FEAT_DEF(RISCV_ISA_K, REG_HWCAP, 1 << 10)
+ FEAT_DEF(RISCV_ISA_L, REG_HWCAP, 1 << 11)
+ FEAT_DEF(RISCV_ISA_M, REG_HWCAP, 1 << 12)
+ FEAT_DEF(RISCV_ISA_N, REG_HWCAP, 1 << 13)
+ FEAT_DEF(RISCV_ISA_O, REG_HWCAP, 1 << 14)
+ FEAT_DEF(RISCV_ISA_P, REG_HWCAP, 1 << 15)
+ FEAT_DEF(RISCV_ISA_Q, REG_HWCAP, 1 << 16)
+ FEAT_DEF(RISCV_ISA_R, REG_HWCAP, 1 << 17)
+ FEAT_DEF(RISCV_ISA_S, REG_HWCAP, 1 << 18)
+ FEAT_DEF(RISCV_ISA_T, REG_HWCAP, 1 << 19)
+ FEAT_DEF(RISCV_ISA_U, REG_HWCAP, 1 << 20)
+ FEAT_DEF(RISCV_ISA_V, REG_HWCAP, 1 << 21)
+ FEAT_DEF(RISCV_ISA_W, REG_HWCAP, 1 << 22)
+ FEAT_DEF(RISCV_ISA_X, REG_HWCAP, 1 << 23)
+ FEAT_DEF(RISCV_ISA_Y, REG_HWCAP, 1 << 24)
+ FEAT_DEF(RISCV_ISA_Z, REG_HWCAP, 1 << 25)
+
+#ifdef RTE_RISCV_FEATURE_ZBC
+ FEAT_DEF(RISCV_EXT_ZBC, REG_HWPROBE_IMA_EXT_0, RISCV_HWPROBE_EXT_ZBC)
+#else
+ FEAT_DEF(RISCV_EXT_ZBC, REG_HWPROBE_IMA_EXT_0, 0)
+#endif
};
+
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+/*
+ * Use kernel interface for probing hardware capabilities to get extensions
+ * present on this machine
+ */
+static uint64_t
+rte_cpu_hwprobe_ima_ext(void)
+{
+ long ret;
+ struct riscv_hwprobe extensions_pair;
+
+ struct riscv_hwprobe *pairs = &extensions_pair;
+ size_t pair_count = 1;
+ /* empty set of cpus returns extensions present on all cpus */
+ cpu_set_t *cpus = NULL;
+ size_t cpusetsize = 0;
+ unsigned int flags = 0;
+
+ extensions_pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ ret = syscall(__NR_riscv_hwprobe, pairs, pair_count, cpusetsize, cpus,
+ flags);
+
+ if (ret != 0)
+ return 0;
+ return extensions_pair.value;
+}
+#endif /* RTE_RISCV_FEATURE_HWPROBE */
+
/*
* Read AUXV software register and get cpu features for ARM
*/
@@ -85,6 +130,9 @@ rte_cpu_get_features(hwcap_registers_t out)
{
out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+#ifdef RTE_RISCV_FEATURE_HWPROBE
+ out[REG_HWPROBE_IMA_EXT_0] = rte_cpu_hwprobe_ima_ext();
+#endif
}
/*
@@ -104,7 +152,7 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
return -EFAULT;
rte_cpu_get_features(regs);
- return (regs[feat->reg] >> feat->bit) & 1;
+ return (regs[feat->reg] & feat->mask) != 0;
}
const char *
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 2/9] hash: implement CRC using riscv carryless multiply
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
@ 2024-08-27 15:32 ` Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 3/9] net: " Daniel Gregory
` (3 subsequent siblings)
5 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:32 UTC (permalink / raw)
To: Stanislaw Kardach, Thomas Monjalon, Yipeng Wang, Sameh Gobriel,
Bruce Richardson, Vladimir Medvedkin
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
Using carryless multiply instructions from RISC-V's Zbc extension,
implement a Barrett reduction that calculates CRC-32C checksums.
Based on the approach described by Intel's whitepaper on "Fast CRC
Computation for Generic Polynomials Using PCLMULQDQ Instruction", which
is also described here
(https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/)
Add a case to the autotest_hash unit test.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_hash.c | 7 +++
lib/hash/meson.build | 1 +
lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++
lib/hash/rte_hash_crc.c | 13 +++++-
lib/hash/rte_hash_crc.h | 6 ++-
6 files changed, 115 insertions(+), 2 deletions(-)
create mode 100644 lib/hash/rte_crc_riscv64.h
diff --git a/MAINTAINERS b/MAINTAINERS
index c5a703b5c0..fa081552c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -322,6 +322,7 @@ M: Stanislaw Kardach <stanislaw.kardach@gmail.com>
F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
+F: lib/hash/rte_crc_riscv64.h
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 65b9cad93c..dd491ea4d9 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -231,6 +231,13 @@ test_crc32_hash_alg_equiv(void)
printf("Failed checking CRC32_SW against CRC32_ARM64\n");
break;
}
+
+ /* Check against 8-byte-operand RISCV64 CRC32 if available */
+ rte_hash_crc_set_alg(CRC32_RISCV64);
+ if (hash_val != rte_hash_crc(data64, data_len, init_val)) {
+ printf("Failed checking CRC32_SW against CRC32_RISC64\n");
+ break;
+ }
}
/* Resetting to best available algorithm */
diff --git a/lib/hash/meson.build b/lib/hash/meson.build
index 277eb9fa93..8355869a80 100644
--- a/lib/hash/meson.build
+++ b/lib/hash/meson.build
@@ -12,6 +12,7 @@ headers = files(
indirect_headers += files(
'rte_crc_arm64.h',
'rte_crc_generic.h',
+ 'rte_crc_riscv64.h',
'rte_crc_sw.h',
'rte_crc_x86.h',
'rte_thash_x86_gfni.h',
diff --git a/lib/hash/rte_crc_riscv64.h b/lib/hash/rte_crc_riscv64.h
new file mode 100644
index 0000000000..94f6857c69
--- /dev/null
+++ b/lib/hash/rte_crc_riscv64.h
@@ -0,0 +1,89 @@
+/* SPDX-License_Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <riscv_bitmanip.h>
+
+#ifndef _RTE_CRC_RISCV64_H_
+#define _RTE_CRC_RISCV64_H_
+
+/*
+ * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected
+ * output. As reflecting the value we're checksumming is expensive, we instead
+ * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm.
+ *
+ * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6)
+ * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc
+ * using only two multiplications (https://mary.rs/lab/crc32/)
+ */
+static const uint64_t p = 0x105EC76F1;
+static const uint64_t mu = 0x4869EC38DEA713F1UL;
+
+/* Calculate the CRC32C checksum using a Barrett reduction */
+static inline uint32_t
+crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ crc = __riscv_clmul_64(crc, mu);
+ /* Multiply by P */
+ crc = __riscv_clmulh_64(crc, p);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ crc ^= init_val >> bits;
+
+ return crc;
+}
+
+/*
+ * Use carryless multiply to perform hash on a value, falling back on the
+ * software in case the Zbc extension is not supported
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 8);
+
+ return crc32c_1byte(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 16);
+
+ return crc32c_2bytes(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 32);
+
+ return crc32c_1word(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 64);
+
+ return crc32c_2words(data, init_val);
+}
+
+#endif /* _RTE_CRC_RISCV64_H_ */
diff --git a/lib/hash/rte_hash_crc.c b/lib/hash/rte_hash_crc.c
index c037cdb0f0..3eb696a576 100644
--- a/lib/hash/rte_hash_crc.c
+++ b/lib/hash/rte_hash_crc.c
@@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO);
uint8_t rte_hash_crc32_alg = CRC32_SW;
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISCV64 Zbc extension if available
*
*/
void
@@ -52,6 +53,14 @@ rte_hash_crc_set_alg(uint8_t alg)
rte_hash_crc32_alg = CRC32_ARM64;
#endif
+#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ if (!(alg & CRC32_RISCV64))
+ HASH_CRC_LOG(WARNING,
+ "Unsupported CRC32 algorithm requested using CRC32_RISCV64");
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+ rte_hash_crc32_alg = CRC32_RISCV64;
+#endif
+
if (rte_hash_crc32_alg == CRC32_SW)
HASH_CRC_LOG(WARNING,
"Unsupported CRC32 algorithm requested using CRC32_SW");
@@ -64,6 +73,8 @@ RTE_INIT(rte_hash_crc_init_alg)
rte_hash_crc_set_alg(CRC32_SSE42_x64);
#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
rte_hash_crc_set_alg(CRC32_ARM64);
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+ rte_hash_crc_set_alg(CRC32_RISCV64);
#else
rte_hash_crc_set_alg(CRC32_SW);
#endif
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..034ce1f8b4 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -28,6 +28,7 @@ extern "C" {
#define CRC32_x64 (1U << 2)
#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
#define CRC32_ARM64 (1U << 3)
+#define CRC32_RISCV64 (1U << 4)
extern uint8_t rte_hash_crc32_alg;
@@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg;
#include "rte_crc_arm64.h"
#elif defined(RTE_ARCH_X86)
#include "rte_crc_x86.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_ZBC)
+#include "rte_crc_riscv64.h"
#else
#include "rte_crc_generic.h"
#endif
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default rv64gc_zbc)
*/
void
rte_hash_crc_set_alg(uint8_t alg);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 3/9] net: implement CRC using riscv carryless multiply
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 2/9] hash: implement CRC using riscv carryless multiply Daniel Gregory
@ 2024-08-27 15:32 ` Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
` (2 subsequent siblings)
5 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:32 UTC (permalink / raw)
To: Stanislaw Kardach, Thomas Monjalon, Jasvinder Singh
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
Using carryless multiply instructions (clmul) from RISC-V's Zbc
extension, implement CRC-32 and CRC-16 calculations on buffers.
Based on the approach described in Intel's whitepaper on "Fast CRC
Computation for Generic Polynomails Using PCLMULQDQ Instructions", we
perform repeated folds-by-1 whilst the buffer is still big enough, then
perform Barrett's reductions on the rest.
Add a case to the crc_autotest suite that tests this implementation.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_crc.c | 9 ++
lib/net/meson.build | 4 +
lib/net/net_crc.h | 11 +++
lib/net/net_crc_zbc.c | 191 ++++++++++++++++++++++++++++++++++++++++++
lib/net/rte_net_crc.c | 40 +++++++++
lib/net/rte_net_crc.h | 2 +
7 files changed, 258 insertions(+)
create mode 100644 lib/net/net_crc_zbc.c
diff --git a/MAINTAINERS b/MAINTAINERS
index fa081552c7..eeaa2c645e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -323,6 +323,7 @@ F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
F: lib/hash/rte_crc_riscv64.h
+F: lib/net/net_crc_zbc.c
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
diff --git a/app/test/test_crc.c b/app/test/test_crc.c
index b85fca35fe..fa91557cf5 100644
--- a/app/test/test_crc.c
+++ b/app/test/test_crc.c
@@ -168,6 +168,15 @@ test_crc(void)
return ret;
}
+ /* set CRC riscv mode */
+ rte_net_crc_set_alg(RTE_NET_CRC_ZBC);
+
+ ret = test_crc_calc();
+ if (ret < 0) {
+ printf("test crc (riscv64 zbc clmul): failed (%d)\n", ret);
+ return ret;
+ }
+
return 0;
}
diff --git a/lib/net/meson.build b/lib/net/meson.build
index 0b69138949..404d8dd3ae 100644
--- a/lib/net/meson.build
+++ b/lib/net/meson.build
@@ -125,4 +125,8 @@ elif (dpdk_conf.has('RTE_ARCH_ARM64') and
cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '')
sources += files('net_crc_neon.c')
cflags += ['-DCC_ARM64_NEON_PMULL_SUPPORT']
+elif (dpdk_conf.has('RTE_ARCH_RISCV') and
+ cc.get_define('RTE_RISCV_FEATURE_ZBC', args: machine_args) != '')
+ sources += files('net_crc_zbc.c')
+ cflags += ['-DCC_RISCV64_ZBC_CLMUL_SUPPORT']
endif
diff --git a/lib/net/net_crc.h b/lib/net/net_crc.h
index 7a74d5406c..06ae113b47 100644
--- a/lib/net/net_crc.h
+++ b/lib/net/net_crc.h
@@ -42,4 +42,15 @@ rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
uint32_t
rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
+/* RISCV64 Zbc */
+void
+rte_net_crc_zbc_init(void);
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len);
+
+
#endif /* _NET_CRC_H_ */
diff --git a/lib/net/net_crc_zbc.c b/lib/net/net_crc_zbc.c
new file mode 100644
index 0000000000..be416ba52f
--- /dev/null
+++ b/lib/net/net_crc_zbc.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <riscv_bitmanip.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_net_crc.h>
+
+#include "net_crc.h"
+
+/* CLMUL CRC computation context structure */
+struct crc_clmul_ctx {
+ uint64_t Pr;
+ uint64_t mu;
+ uint64_t k3;
+ uint64_t k4;
+ uint64_t k5;
+};
+
+struct crc_clmul_ctx crc32_eth_clmul;
+struct crc_clmul_ctx crc16_ccitt_clmul;
+
+/* Perform Barrett's reduction on 8, 16, 32 or 64-bit value */
+static inline uint32_t
+crc32_barrett_zbc(
+ const uint64_t data,
+ uint32_t crc,
+ uint32_t bits,
+ const struct crc_clmul_ctx *params)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t temp = (uint64_t)(data ^ crc) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ temp = __riscv_clmul_64(temp, params->mu);
+ /* Multiply by P */
+ temp = __riscv_clmulh_64(temp, params->Pr);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ temp ^= crc >> bits;
+
+ return temp;
+}
+
+/* Repeat Barrett's reduction for short buffer sizes */
+static inline uint32_t
+crc32_repeated_barrett_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ while (data_len >= 8) {
+ crc = crc32_barrett_zbc(*(const uint64_t *)data, crc, 64, params);
+ data += 8;
+ data_len -= 8;
+ }
+ if (data_len >= 4) {
+ crc = crc32_barrett_zbc(*(const uint32_t *)data, crc, 32, params);
+ data += 4;
+ data_len -= 4;
+ }
+ if (data_len >= 2) {
+ crc = crc32_barrett_zbc(*(const uint16_t *)data, crc, 16, params);
+ data += 2;
+ data_len -= 2;
+ }
+ if (data_len >= 1)
+ crc = crc32_barrett_zbc(*(const uint8_t *)data, crc, 8, params);
+
+ return crc;
+}
+
+/* Perform a reduction by 1 on a buffer (minimum length 2) */
+static inline void
+crc32_reduce_zbc(const uint64_t *data, uint64_t *high, uint64_t *low,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t highh = __riscv_clmulh_64(params->k3, *high);
+ uint64_t highl = __riscv_clmul_64(params->k3, *high);
+ uint64_t lowh = __riscv_clmulh_64(params->k4, *low);
+ uint64_t lowl = __riscv_clmul_64(params->k4, *low);
+
+ *high = highl ^ lowl;
+ *low = highh ^ lowh;
+
+ *high ^= *(data++);
+ *low ^= *(data++);
+}
+
+static inline uint32_t
+crc32_eth_calc_zbc(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_clmul_ctx *params)
+{
+ uint64_t high, low;
+ /* Minimum length we can do reduction-by-1 over */
+ const uint32_t min_len = 16;
+ /* Barrett reduce until buffer aligned to 8-byte word */
+ uint32_t misalign = (size_t)data & 7;
+ if (misalign != 0 && misalign <= data_len) {
+ crc = crc32_repeated_barrett_zbc(data, misalign, crc, params);
+ data += misalign;
+ data_len -= misalign;
+ }
+
+ if (data_len < min_len)
+ return crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ /* Fold buffer into two 8-byte words */
+ high = *((const uint64_t *)data) ^ crc;
+ low = *((const uint64_t *)(data + 8));
+ data += 16;
+ data_len -= 16;
+
+ for (; data_len >= 16; data_len -= 16, data += 16)
+ crc32_reduce_zbc((const uint64_t *)data, &high, &low, params);
+
+ /* Fold last 128 bits into 96 */
+ low = __riscv_clmul_64(params->k4, high) ^ low;
+ high = __riscv_clmulh_64(params->k4, high);
+ /* Upper 32 bits of high are now zero */
+ high = (low >> 32) | (high << 32);
+
+ /* Fold last 96 bits into 64 */
+ low = __riscv_clmul_64(low & 0xffffffff, params->k5);
+ low ^= high;
+
+ /*
+ * Barrett reduction of remaining 64 bits, using high to store initial
+ * value of low
+ */
+ high = low;
+ low = __riscv_clmul_64(low, params->mu);
+ low &= 0xffffffff;
+ low = __riscv_clmul_64(low, params->Pr);
+ crc = (high ^ low) >> 32;
+
+ /* Combine crc with any excess */
+ crc = crc32_repeated_barrett_zbc(data, data_len, crc, params);
+
+ return crc;
+}
+
+void
+rte_net_crc_zbc_init(void)
+{
+ /* Initialise CRC32 data */
+ crc32_eth_clmul.Pr = 0x1db710641LL; /* polynomial P reversed */
+ crc32_eth_clmul.mu = 0xb4e5b025f7011641LL; /* (2 ^ 64 / P) reversed */
+ crc32_eth_clmul.k3 = 0x1751997d0LL; /* (x^(128+32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k4 = 0x0ccaa009eLL; /* (x^(128-32) mod P << 32) reversed << 1 */
+ crc32_eth_clmul.k5 = 0x163cd6124LL; /* (x^64 mod P << 32) reversed << 1 */
+
+ /* Initialise CRC16 data */
+ /* Same calculations as above, with polynomial << 16 */
+ crc16_ccitt_clmul.Pr = 0x10811LL;
+ crc16_ccitt_clmul.mu = 0x859b040b1c581911LL;
+ crc16_ccitt_clmul.k3 = 0x8e10LL;
+ crc16_ccitt_clmul.k4 = 0x189aeLL;
+ crc16_ccitt_clmul.k5 = 0x114aaLL;
+}
+
+uint32_t
+rte_crc16_ccitt_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ /* Negate the crc, which is present in the lower 16-bits */
+ return (uint16_t)~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffff,
+ &crc16_ccitt_clmul);
+}
+
+uint32_t
+rte_crc32_eth_zbc_handler(const uint8_t *data, uint32_t data_len)
+{
+ return ~crc32_eth_calc_zbc(data,
+ data_len,
+ 0xffffffffUL,
+ &crc32_eth_clmul);
+}
diff --git a/lib/net/rte_net_crc.c b/lib/net/rte_net_crc.c
index 346c285c15..9f04a0cb57 100644
--- a/lib/net/rte_net_crc.c
+++ b/lib/net/rte_net_crc.c
@@ -67,6 +67,12 @@ static const rte_net_crc_handler handlers_neon[] = {
[RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler,
};
#endif
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+static const rte_net_crc_handler handlers_zbc[] = {
+ [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_zbc_handler,
+ [RTE_NET_CRC32_ETH] = rte_crc32_eth_zbc_handler,
+};
+#endif
static uint16_t max_simd_bitwidth;
@@ -244,6 +250,31 @@ neon_pmull_init(void)
#endif
}
+/* ZBC/CLMUL handling */
+
+#define ZBC_CLMUL_CPU_SUPPORTED \
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC)
+
+static const rte_net_crc_handler *
+zbc_clmul_get_handlers(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ return handlers_zbc;
+#endif
+ NET_LOG(INFO, "Requirements not met, can't use Zbc");
+ return NULL;
+}
+
+static void
+zbc_clmul_init(void)
+{
+#ifdef CC_RISCV64_ZBC_CLMUL_SUPPORT
+ if (ZBC_CLMUL_CPU_SUPPORTED)
+ rte_net_crc_zbc_init();
+#endif
+}
+
/* Default handling */
static uint32_t
@@ -260,6 +291,9 @@ rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = neon_pmull_get_handlers();
+ if (handlers != NULL)
+ return handlers[RTE_NET_CRC16_CCITT](data, data_len);
+ handlers = zbc_clmul_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC16_CCITT](data, data_len);
handlers = handlers_scalar;
@@ -282,6 +316,8 @@ rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
handlers = neon_pmull_get_handlers();
if (handlers != NULL)
return handlers[RTE_NET_CRC32_ETH](data, data_len);
+ handlers = zbc_clmul_get_handlers();
+ return handlers[RTE_NET_CRC32_ETH](data, data_len);
handlers = handlers_scalar;
return handlers[RTE_NET_CRC32_ETH](data, data_len);
}
@@ -306,6 +342,9 @@ rte_net_crc_set_alg(enum rte_net_crc_alg alg)
break; /* for x86, always break here */
case RTE_NET_CRC_NEON:
handlers = neon_pmull_get_handlers();
+ break;
+ case RTE_NET_CRC_ZBC:
+ handlers = zbc_clmul_get_handlers();
/* fall-through */
case RTE_NET_CRC_SCALAR:
/* fall-through */
@@ -338,4 +377,5 @@ RTE_INIT(rte_net_crc_init)
sse42_pclmulqdq_init();
avx512_vpclmulqdq_init();
neon_pmull_init();
+ zbc_clmul_init();
}
diff --git a/lib/net/rte_net_crc.h b/lib/net/rte_net_crc.h
index 72d3e10ff6..12fa6a8a02 100644
--- a/lib/net/rte_net_crc.h
+++ b/lib/net/rte_net_crc.h
@@ -24,6 +24,7 @@ enum rte_net_crc_alg {
RTE_NET_CRC_SSE42,
RTE_NET_CRC_NEON,
RTE_NET_CRC_AVX512,
+ RTE_NET_CRC_ZBC,
};
/**
@@ -37,6 +38,7 @@ enum rte_net_crc_alg {
* - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
* - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)
* - RTE_NET_CRC_AVX512 (Use 512-bit AVX intrinsic)
+ * - RTE_NET_CRC_ZBC (Use RISC-V Zbc extension)
*/
void
rte_net_crc_set_alg(enum rte_net_crc_alg alg);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 4/9] config/riscv: add qemu crossbuild target
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
` (2 preceding siblings ...)
2024-08-27 15:32 ` [PATCH v3 3/9] net: " Daniel Gregory
@ 2024-08-27 15:32 ` Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
2024-09-17 14:26 ` [PATCH v3 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
5 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:32 UTC (permalink / raw)
To: Stanislaw Kardach, Bruce Richardson
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
A new cross-compilation target that has extensions that DPDK uses and
QEMU supports. Initially, this is just the Zbc extension for hardware
CRC support.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
config/riscv/meson.build | 3 ++-
config/riscv/riscv64_qemu_linux_gcc | 17 +++++++++++++++++
.../linux_gsg/cross_build_dpdk_for_riscv.rst | 5 +++++
3 files changed, 24 insertions(+), 1 deletion(-)
create mode 100644 config/riscv/riscv64_qemu_linux_gcc
diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index 5d8411b254..337b26bbac 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -43,7 +43,8 @@ vendor_generic = {
['RTE_MAX_NUMA_NODES', 2]
],
'arch_config': {
- 'generic': {'machine_args': ['-march=rv64gc']}
+ 'generic': {'machine_args': ['-march=rv64gc']},
+ 'qemu': {'machine_args': ['-march=rv64gc_zbc']},
}
}
diff --git a/config/riscv/riscv64_qemu_linux_gcc b/config/riscv/riscv64_qemu_linux_gcc
new file mode 100644
index 0000000000..007cc98885
--- /dev/null
+++ b/config/riscv/riscv64_qemu_linux_gcc
@@ -0,0 +1,17 @@
+[binaries]
+c = ['ccache', 'riscv64-linux-gnu-gcc']
+cpp = ['ccache', 'riscv64-linux-gnu-g++']
+ar = 'riscv64-linux-gnu-ar'
+strip = 'riscv64-linux-gnu-strip'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'riscv64'
+cpu = 'rv64gc_zbc'
+endian = 'little'
+
+[properties]
+vendor_id = 'generic'
+arch_id = 'qemu'
+pkg_config_libdir = '/usr/lib/riscv64-linux-gnu/pkgconfig'
diff --git a/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst b/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
index 7d7f7ac72b..c3b67671a0 100644
--- a/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
+++ b/doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
@@ -110,6 +110,11 @@ Currently the following targets are supported:
* SiFive U740 SoC: ``config/riscv/riscv64_sifive_u740_linux_gcc``
+* QEMU: ``config/riscv/riscv64_qemu_linux_gcc``
+
+ * A target with all the extensions that QEMU supports that DPDK has a use for
+ (currently ``rv64gc_zbc``). Requires QEMU version 7.0.0 or newer.
+
To add a new target support, ``config/riscv/meson.build`` has to be modified by
adding a new vendor/architecture id and a corresponding cross-file has to be
added to ``config/riscv`` directory.
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
` (3 preceding siblings ...)
2024-08-27 15:32 ` [PATCH v3 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
@ 2024-08-27 15:36 ` Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 6/9] ipfrag: " Daniel Gregory
` (3 more replies)
2024-09-17 14:26 ` [PATCH v3 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
5 siblings, 4 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:36 UTC (permalink / raw)
To: Stanislaw Kardach
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
examples/l3fwd/l3fwd_em.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 31a7e05e39..36520401e5 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -29,7 +29,7 @@
#include "l3fwd_event.h"
#include "em_route_parse.c"
-#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
+#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) || defined(RTE_RISCV_FEATURE_ZBC)
#define EM_HASH_CRC 1
#endif
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 6/9] ipfrag: use accelerated CRC on riscv
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
@ 2024-08-27 15:36 ` Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 7/9] examples/l3fwd-power: " Daniel Gregory
` (2 subsequent siblings)
3 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:36 UTC (permalink / raw)
To: Stanislaw Kardach, Konstantin Ananyev
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/ip_frag/ip_frag_internal.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/ip_frag/ip_frag_internal.c b/lib/ip_frag/ip_frag_internal.c
index 7cbef647df..19a28c447b 100644
--- a/lib/ip_frag/ip_frag_internal.c
+++ b/lib/ip_frag/ip_frag_internal.c
@@ -45,14 +45,14 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *)&key->src_dst;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_RISCV_FEATURE_ZBC)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(key->id, v);
#else
v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
-#endif /* RTE_ARCH_X86 */
+#endif /* RTE_ARCH_X86 || RTE_ARCH_ARM64 || RTE_RISCV_FEATURE_ZBC */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
@@ -66,7 +66,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *) &key->src_dst;
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_RISCV_FEATURE_ZBC)
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(p[2], v);
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 7/9] examples/l3fwd-power: use accelerated CRC on riscv
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 6/9] ipfrag: " Daniel Gregory
@ 2024-08-27 15:36 ` Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 8/9] hash/cuckoo: " Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 9/9] member: " Daniel Gregory
3 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:36 UTC (permalink / raw)
To: Stanislaw Kardach, Anatoly Burakov, David Hunt, Sivaprasad Tummala
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
examples/l3fwd-power/main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 2bb6b092c3..c631c14193 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -270,7 +270,7 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
-#ifdef RTE_ARCH_X86
+#if defined(RTE_ARCH_X86) || defined(RTE_RISCV_FEATURE_ZBC)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 8/9] hash/cuckoo: use accelerated CRC on riscv
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 6/9] ipfrag: " Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 7/9] examples/l3fwd-power: " Daniel Gregory
@ 2024-08-27 15:36 ` Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 9/9] member: " Daniel Gregory
3 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:36 UTC (permalink / raw)
To: Stanislaw Kardach, Yipeng Wang, Sameh Gobriel, Bruce Richardson,
Vladimir Medvedkin
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/hash/rte_cuckoo_hash.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index 577b5839d3..872f88fdce 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -427,6 +427,9 @@ rte_hash_create(const struct rte_hash_parameters *params)
#elif defined(RTE_ARCH_ARM64)
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
default_hash_func = (rte_hash_function)rte_hash_crc;
+#elif defined(RTE_ARCH_RISCV)
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_EXT_ZBC))
+ default_hash_func = (rte_hash_function)rte_hash_crc;
#endif
/* Setup hash context */
strlcpy(h->name, params->name, sizeof(h->name));
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v3 9/9] member: use accelerated CRC on riscv
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
` (2 preceding siblings ...)
2024-08-27 15:36 ` [PATCH v3 8/9] hash/cuckoo: " Daniel Gregory
@ 2024-08-27 15:36 ` Daniel Gregory
3 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-08-27 15:36 UTC (permalink / raw)
To: Stanislaw Kardach, Yipeng Wang, Sameh Gobriel
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng,
Daniel Gregory
When the RISC-V Zbc (carryless multiplication) extension is present, an
implementation of CRC hashing using hardware instructions is available.
Use it rather than jhash.
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
lib/member/rte_member.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index aec192eba5..152659628a 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -92,7 +92,7 @@ typedef uint16_t member_set_t;
#define RTE_MEMBER_SKETCH_COUNT_BYTE 0x02
/** @internal Hash function used by membership library. */
-#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
+#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) || defined(RTE_RISCV_FEATURE_ZBC)
#include <rte_hash_crc.h>
#define MEMBER_HASH_FUNC rte_hash_crc
#else
--
2.39.2
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v3 0/9] riscv: implement accelerated crc using zbc
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
` (4 preceding siblings ...)
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
@ 2024-09-17 14:26 ` Daniel Gregory
5 siblings, 0 replies; 36+ messages in thread
From: Daniel Gregory @ 2024-09-17 14:26 UTC (permalink / raw)
To: Stanislaw Kardach
Cc: dev, Punit Agrawal, Liang Ma, Pengcheng Wang, Chunsong Feng
Would it be possible to get a review on this patchset? I would be happy
to hear any feedback on the approach to RISC-V extension detection or
how I have implemented the hardware-optimised CRCs.
Kind regards,
Daniel
^ permalink raw reply [flat|nested] 36+ messages in thread