From: Daniel Gregory <daniel.gregory@bytedance.com>
To: Thomas Monjalon <thomas@monjalon.net>,
Yipeng Wang <yipeng1.wang@intel.com>,
Sameh Gobriel <sameh.gobriel@intel.com>,
Bruce Richardson <bruce.richardson@intel.com>,
Vladimir Medvedkin <vladimir.medvedkin@intel.com>,
Stanislaw Kardach <stanislaw.kardach@gmail.com>
Cc: dev@dpdk.org, Liang Ma <liangma@liangbit.com>,
Punit Agrawal <punit.agrawal@bytedance.com>,
Pengcheng Wang <wangpengcheng.pp@bytedance.com>,
Chunsong Feng <fengchunsong@bytedance.com>,
Daniel Gregory <daniel.gregory@bytedance.com>
Subject: [PATCH 2/5] hash: implement crc using riscv carryless multiply
Date: Tue, 18 Jun 2024 18:41:30 +0100 [thread overview]
Message-ID: <20240618174133.33457-3-daniel.gregory@bytedance.com> (raw)
In-Reply-To: <20240618174133.33457-1-daniel.gregory@bytedance.com>
Using carryless multiply instructions from RISC-V's Zbc extension,
implement a Barrett reduction that calculates CRC-32C checksums.
Based on the approach described by Intel's whitepaper on "Fast CRC
Computation for Generic Polynomials Using PCLMULQDQ Instruction", which
is also described here
(https://web.archive.org/web/20240111232520/https://mary.rs/lab/crc32/)
Signed-off-by: Daniel Gregory <daniel.gregory@bytedance.com>
---
MAINTAINERS | 1 +
app/test/test_hash.c | 7 +++
lib/hash/meson.build | 1 +
lib/hash/rte_crc_riscv64.h | 89 ++++++++++++++++++++++++++++++++++++++
lib/hash/rte_hash_crc.c | 12 ++++-
lib/hash/rte_hash_crc.h | 6 ++-
6 files changed, 114 insertions(+), 2 deletions(-)
create mode 100644 lib/hash/rte_crc_riscv64.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 472713124c..48800f39c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -318,6 +318,7 @@ M: Stanislaw Kardach <stanislaw.kardach@gmail.com>
F: config/riscv/
F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
F: lib/eal/riscv/
+F: lib/hash/rte_crc_riscv64.h
Intel x86
M: Bruce Richardson <bruce.richardson@intel.com>
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 24d3b547ad..c8c4197ad8 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -205,6 +205,13 @@ test_crc32_hash_alg_equiv(void)
printf("Failed checking CRC32_SW against CRC32_ARM64\n");
break;
}
+
+ /* Check against 8-byte-operand RISCV64 CRC32 if available */
+ rte_hash_crc_set_alg(CRC32_RISCV64);
+ if (hash_val != rte_hash_crc(data64, data_len, init_val)) {
+ printf("Failed checking CRC32_SW against CRC32_RISC64\n");
+ break;
+ }
}
/* Resetting to best available algorithm */
diff --git a/lib/hash/meson.build b/lib/hash/meson.build
index 277eb9fa93..8355869a80 100644
--- a/lib/hash/meson.build
+++ b/lib/hash/meson.build
@@ -12,6 +12,7 @@ headers = files(
indirect_headers += files(
'rte_crc_arm64.h',
'rte_crc_generic.h',
+ 'rte_crc_riscv64.h',
'rte_crc_sw.h',
'rte_crc_x86.h',
'rte_thash_x86_gfni.h',
diff --git a/lib/hash/rte_crc_riscv64.h b/lib/hash/rte_crc_riscv64.h
new file mode 100644
index 0000000000..94f6857c69
--- /dev/null
+++ b/lib/hash/rte_crc_riscv64.h
@@ -0,0 +1,89 @@
+/* SPDX-License_Identifier: BSD-3-Clause
+ * Copyright(c) ByteDance 2024
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <riscv_bitmanip.h>
+
+#ifndef _RTE_CRC_RISCV64_H_
+#define _RTE_CRC_RISCV64_H_
+
+/*
+ * CRC-32C takes a reflected input (bit 7 is the lsb) and produces a reflected
+ * output. As reflecting the value we're checksumming is expensive, we instead
+ * reflect the polynomial P (0x11EDC6F41) and mu and our CRC32 algorithm.
+ *
+ * The mu constant is used for a Barrett reduction. It's 2^96 / P (0x11F91CAF6)
+ * reflected. Picking 2^96 rather than 2^64 means we can calculate a 64-bit crc
+ * using only two multiplications (https://mary.rs/lab/crc32/)
+ */
+static const uint64_t p = 0x105EC76F1;
+static const uint64_t mu = 0x4869EC38DEA713F1UL;
+
+/* Calculate the CRC32C checksum using a Barrett reduction */
+static inline uint32_t
+crc32c_riscv64(uint64_t data, uint32_t init_val, uint32_t bits)
+{
+ assert((bits == 64) || (bits == 32) || (bits == 16) || (bits == 8));
+
+ /* Combine data with the initial value */
+ uint64_t crc = (uint64_t)(data ^ init_val) << (64 - bits);
+
+ /*
+ * Multiply by mu, which is 2^96 / P. Division by 2^96 occurs by taking
+ * the lower 64 bits of the result (remember we're inverted)
+ */
+ crc = __riscv_clmul_64(crc, mu);
+ /* Multiply by P */
+ crc = __riscv_clmulh_64(crc, p);
+
+ /* Subtract from original (only needed for smaller sizes) */
+ if (bits == 16 || bits == 8)
+ crc ^= init_val >> bits;
+
+ return crc;
+}
+
+/*
+ * Use carryless multiply to perform hash on a value, falling back on the
+ * software in case the Zbc extension is not supported
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 8);
+
+ return crc32c_1byte(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 16);
+
+ return crc32c_2bytes(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 32);
+
+ return crc32c_1word(data, init_val);
+}
+
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+ if (likely(rte_hash_crc32_alg & CRC32_RISCV64))
+ return crc32c_riscv64(data, init_val, 64);
+
+ return crc32c_2words(data, init_val);
+}
+
+#endif /* _RTE_CRC_RISCV64_H_ */
diff --git a/lib/hash/rte_hash_crc.c b/lib/hash/rte_hash_crc.c
index c037cdb0f0..ece1a84b29 100644
--- a/lib/hash/rte_hash_crc.c
+++ b/lib/hash/rte_hash_crc.c
@@ -15,7 +15,7 @@ RTE_LOG_REGISTER_SUFFIX(hash_crc_logtype, crc, INFO);
uint8_t rte_hash_crc32_alg = CRC32_SW;
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -24,6 +24,7 @@ uint8_t rte_hash_crc32_alg = CRC32_SW;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISCV64 Zbc extension if available
*
*/
void
@@ -52,6 +53,13 @@ rte_hash_crc_set_alg(uint8_t alg)
rte_hash_crc32_alg = CRC32_ARM64;
#endif
+#if defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC)
+ if (!(alg & CRC32_RISCV64))
+ HASH_CRC_LOG(WARNING,
+ "Unsupported CRC32 algorithm requested using CRC32_RISCV64");
+ rte_hash_crc32_alg = CRC32_RISCV64;
+#endif
+
if (rte_hash_crc32_alg == CRC32_SW)
HASH_CRC_LOG(WARNING,
"Unsupported CRC32 algorithm requested using CRC32_SW");
@@ -64,6 +72,8 @@ RTE_INIT(rte_hash_crc_init_alg)
rte_hash_crc_set_alg(CRC32_SSE42_x64);
#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32)
rte_hash_crc_set_alg(CRC32_ARM64);
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC)
+ rte_hash_crc_set_alg(CRC32_RISCV64);
#else
rte_hash_crc_set_alg(CRC32_SW);
#endif
diff --git a/lib/hash/rte_hash_crc.h b/lib/hash/rte_hash_crc.h
index 8ad2422ec3..2be433fa21 100644
--- a/lib/hash/rte_hash_crc.h
+++ b/lib/hash/rte_hash_crc.h
@@ -28,6 +28,7 @@ extern "C" {
#define CRC32_x64 (1U << 2)
#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
#define CRC32_ARM64 (1U << 3)
+#define CRC32_RISCV64 (1U << 4)
extern uint8_t rte_hash_crc32_alg;
@@ -35,12 +36,14 @@ extern uint8_t rte_hash_crc32_alg;
#include "rte_crc_arm64.h"
#elif defined(RTE_ARCH_X86)
#include "rte_crc_x86.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_ZBC)
+#include "rte_crc_riscv64.h"
#else
#include "rte_crc_generic.h"
#endif
/**
- * Allow or disallow use of SSE4.2/ARMv8 intrinsics for CRC32 hash
+ * Allow or disallow use of SSE4.2/ARMv8/RISC-V intrinsics for CRC32 hash
* calculation.
*
* @param alg
@@ -49,6 +52,7 @@ extern uint8_t rte_hash_crc32_alg;
* - (CRC32_SSE42) Use SSE4.2 intrinsics if available
* - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
* - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available (default ARMv8)
+ * - (CRC32_RISCV64) Use RISC-V Carry-less multiply if available (default rv64gc_zbc)
*/
void
rte_hash_crc_set_alg(uint8_t alg);
--
2.39.2
next prev parent reply other threads:[~2024-06-18 17:42 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-18 17:41 [PATCH 0/5] riscv: implement accelerated crc using zbc Daniel Gregory
2024-06-18 17:41 ` [PATCH 1/5] config/riscv: add flag for using Zbc extension Daniel Gregory
2024-06-18 20:03 ` Stephen Hemminger
2024-06-19 7:08 ` Morten Brørup
2024-06-19 14:49 ` Stephen Hemminger
2024-06-19 16:41 ` Daniel Gregory
2024-06-18 17:41 ` Daniel Gregory [this message]
2024-06-18 17:41 ` [PATCH 3/5] net: implement crc using riscv carryless multiply Daniel Gregory
2024-06-18 17:41 ` [PATCH 4/5] examples/l3fwd: use accelerated crc on riscv Daniel Gregory
2024-06-18 17:41 ` [PATCH 5/5] ipfrag: " Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 2/9] hash: implement crc using riscv carryless multiply Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 3/9] net: " Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 5/9] examples/l3fwd: use accelerated crc on riscv Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 6/9] ipfrag: " Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 7/9] examples/l3fwd-power: " Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 8/9] hash/cuckoo: " Daniel Gregory
2024-07-12 15:46 ` [PATCH v2 9/9] member: " Daniel Gregory
2024-07-12 17:19 ` [PATCH v2 0/9] riscv: implement accelerated crc using zbc David Marchand
2024-08-27 15:32 ` [PATCH v3 " Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 1/9] config/riscv: detect presence of Zbc extension Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 2/9] hash: implement CRC using riscv carryless multiply Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 3/9] net: " Daniel Gregory
2024-08-27 15:32 ` [PATCH v3 4/9] config/riscv: add qemu crossbuild target Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 5/9] examples/l3fwd: use accelerated CRC on riscv Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 6/9] ipfrag: " Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 7/9] examples/l3fwd-power: " Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 8/9] hash/cuckoo: " Daniel Gregory
2024-08-27 15:36 ` [PATCH v3 9/9] member: " Daniel Gregory
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240618174133.33457-3-daniel.gregory@bytedance.com \
--to=daniel.gregory@bytedance.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=fengchunsong@bytedance.com \
--cc=liangma@liangbit.com \
--cc=punit.agrawal@bytedance.com \
--cc=sameh.gobriel@intel.com \
--cc=stanislaw.kardach@gmail.com \
--cc=thomas@monjalon.net \
--cc=vladimir.medvedkin@intel.com \
--cc=wangpengcheng.pp@bytedance.com \
--cc=yipeng1.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).