* [PATCH v1 0/2] Optimization Summary for RISC-V rte_memcpy
@ 2025-10-16 9:09 Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 1/2] riscv support rte_memcpy in vector Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 2/2] benchmark report for rte_memcpy Qiguo Chen
0 siblings, 2 replies; 3+ messages in thread
From: Qiguo Chen @ 2025-10-16 9:09 UTC (permalink / raw)
To: stanislaw.kardach, sunyuechi, stephen; +Cc: dev, bruce.richardson, Qiguo Chen
[-- Attachment #1.1.1: Type: text/plain, Size: 965 bytes --]
I've implemented optimizations to rte_memcpy targeting RISC-V architectures,
achieving an average 10%~15% reduction in execution time for data sizes between
129 to 1024 bytes( 1025~1600 gains little).
These enhancements draw inspiration from x86 implementations,
specifically focusing on:
1)Alignment Handling for Unaligned Scenarios
2)Vector Configuration Tuning
3)Strategic Prefetching
- Patch 1: Cover letter
- Patch 2: Base implementation
- Patch 3: Benchmark report
Tested on Tested on SG2044 (VLEN=128)
Qiguo Chen (2):
riscv support rte_memcpy in vector
benchmark report for rte_memcpy
.mailmap | 1 +
benchmark_report.txt | 149 ++++++++++++++
config/riscv/meson.build | 14 ++
lib/eal/riscv/include/rte_memcpy.h | 310 ++++++++++++++++++++++++++++-
4 files changed, 472 insertions(+), 2 deletions(-)
create mode 100644 benchmark_report.txt
--
2.21.0.windows.1
[-- Attachment #1.1.2: Type: text/html , Size: 1861 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v1 1/2] riscv support rte_memcpy in vector
2025-10-16 9:09 [PATCH v1 0/2] Optimization Summary for RISC-V rte_memcpy Qiguo Chen
@ 2025-10-16 9:09 ` Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 2/2] benchmark report for rte_memcpy Qiguo Chen
1 sibling, 0 replies; 3+ messages in thread
From: Qiguo Chen @ 2025-10-16 9:09 UTC (permalink / raw)
To: stanislaw.kardach, sunyuechi, stephen; +Cc: dev, bruce.richardson, Qiguo Chen
[-- Attachment #1.1.1: Type: text/plain, Size: 9838 bytes --]
This patch implements RISC-V vector intrinsics
to accelerate memory copy operations for byte range (129~1600).
Signed-off-by: Qiguo Chen <chen.qiguo@zte.com.cn>
---
.mailmap | 1 +
config/riscv/meson.build | 14 ++
lib/eal/riscv/include/rte_memcpy.h | 310 ++++++++++++++++++++++++++++-
3 files changed, 323 insertions(+), 2 deletions(-)
diff --git a/.mailmap b/.mailmap
index 08e5ec8560..178c5f44f4 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1285,6 +1285,7 @@ Qian Hao <qi_an_hao@126.com>
Qian Xu <qian.q.xu@intel.com>
Qiao Liu <qiao.liu@intel.com>
Qi Fu <qi.fu@intel.com>
+Qiguo Chen <chen.qiguo@zte.com.cn>
Qimai Xiao <qimaix.xiao@intel.com>
Qiming Chen <chenqiming_huawei@163.com>
Qiming Yang <qiming.yang@intel.com>
diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index f3daea0c0e..abba474b5e 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -146,6 +146,20 @@ if (riscv_extension_macros and
endif
endif
+# detect extensions
+# Requires intrinsics available in GCC 14.1.0+ and Clang 18.1.0+
+if (riscv_extension_macros and
+ (cc.get_define('__riscv_zicbop', args: machine_args) != ''))
+ if ((cc.get_id() == 'gcc' and cc.version().version_compare('>=14.1.0'))
+ or (cc.get_id() == 'clang' and cc.version().version_compare('>=18.1.0')))
+ message('Compiling with the zicbop extension')
+ machine_args += ['-DRTE_RISCV_FEATURE_PREFETCH']
+ else
+ warning('Detected zicbop extension but cannot use because intrinsics are not available (present in GCC 14.1.0+ and Clang 18.1.0+)')
+ endif
+endif
+
+
# apply flags
foreach flag: dpdk_flags
if flag.length() > 0
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index d8a942c5d2..6f8cb0d4a4 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -11,6 +11,7 @@
#include <string.h>
#include "rte_common.h"
+#include <rte_branch_prediction.h>
#include "generic/rte_memcpy.h"
@@ -18,6 +19,290 @@
extern "C" {
#endif
+
+#if defined(RTE_RISCV_FEATURE_V) && !(defined(RTE_RISCV_FEATURE_PREFETCH))
+#undef RTE_RISCV_FEATURE_V
+#endif
+
+
+#if defined(RTE_RISCV_FEATURE_V)
+
+#include "rte_cpuflags.h"
+
+#define RISCV_VLENB 16
+#define MEMCPY_GLIBC (1U << 0)
+#define MEMCPY_RISCV (1U << 1)
+#define ALIGNMENT_MASK_128 0x7F
+#define ALIGNMENT_MASK_64 0x3F
+#define ALIGNMENT_MASK_16 0xF
+
+static uint8_t memcpy_alg = MEMCPY_GLIBC;
+
+
+static __rte_always_inline void
+memcpy_prefetch64_1(const uint8_t *src, uint8_t *dst)
+{
+ __asm__ (
+ "prefetch.r 64(%0)\n"
+ "prefetch.w 64(%1)"
+ :: "r"(src), "r"(dst)
+ );
+}
+
+static __rte_always_inline void
+memcpy_prefetch128_1(const uint8_t *src, uint8_t *dst)
+{
+ __asm__ (
+ "prefetch.r 128(%0)\n"
+ "prefetch.w 128(%1)"
+ :: "r"(src), "r"(dst)
+ );
+}
+
+static __rte_always_inline void
+memcpy_prefetch128_2(const uint8_t *src, uint8_t *dst)
+{
+ __asm__ (
+ "prefetch.r 128(%0);"
+ "prefetch.w 128(%1);"
+ "prefetch.r 192(%0);"
+ "prefetch.w 192(%1)"
+ :: "r"(src), "r"(dst)
+ );
+}
+
+
+static __rte_always_inline void
+_rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ uint32_t n = 32;
+ asm volatile (
+ "vsetvli t1, %2, e8, m2, ta, ma\n"
+ "vle8.v v2, (%1)\n"
+ "vse8.v v2, (%0)"
+ :: "r"(dst), "r"(src), "r"(n)
+ : "v2", "v3", "t1", "memory"
+ );
+}
+
+static __rte_always_inline void
+_rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ uint32_t n = 64;
+ asm volatile (
+ "vsetvli t3, %2, e8, m4, ta, ma\n"
+ "vle8.v v8, (%1)\n"
+ "vse8.v v8, (%0)"
+ :: "r"(dst), "r"(src), "r"(n)
+ : "v8", "v9", "v10", "v11", "t3", "memory"
+ );
+}
+
+static __rte_always_inline void
+_rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ uint32_t n = 128;
+ asm volatile (
+ "vsetvli t4, %2, e8, m8, ta, ma\n"
+ "vle8.v v16, (%1)\n"
+ "vse8.v v16, (%0)"
+ :: "r"(dst), "r"(src), "r"(n)
+ : "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "t4", "memory"
+ );
+}
+
+static __rte_always_inline void
+_rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy_prefetch128_2(src, dst);
+ _rte_mov128(dst, src);
+ _rte_mov128(dst + 128, src + 128);
+}
+
+static __rte_always_inline void
+_rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ asm volatile (
+ "prefetch.r 64(%1)\n"
+ "prefetch.w 64(%0)\n"
+ "prefetch.r 128(%1)\n"
+ "prefetch.w 128(%0)\n"
+ "prefetch.r 192(%1)\n"
+ "prefetch.w 192(%0)\n"
+ "prefetch.r 256(%1)\n"
+ "prefetch.w 256(%0)\n"
+ "prefetch.r 320(%1)\n"
+ "prefetch.w 320(%0)\n"
+ "prefetch.r 384(%1)\n"
+ "prefetch.w 384(%0)\n"
+ "prefetch.r 448(%1)\n"
+ "prefetch.w 448(%0)\n"
+ "prefetch.r 512(%1)\n"
+ "li t6, 512\n"
+ "3:\n"
+ "li t5, 128;"
+ "vsetvli zero, t5, e8, m8, ta, ma\n"
+ "1:;"
+ "bgt %2, t6, 4f\n"
+ "j 2f\n"
+ "4:\n"
+ "prefetch.r 576(%1)\n"
+ "prefetch.r 640(%1)\n"
+ "2:\n"
+ "vle8.v v16, (%1)\n"
+ "add %1, %1, t5\n"
+ "vse8.v v16, (%0)\n"
+ "add %0, %0, t5\n"
+ "sub %2, %2, t5\n"
+ "bnez %2, 1b"
+ : "+r"(dst), "+r"(src), "+r"(n)
+ :
+ : "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "t5", "t6", "memory"
+ );
+}
+
+static __rte_always_inline void
+_rte_mov(uint8_t *dst, const uint8_t *src, uint32_t n)
+{
+ asm volatile (
+ "1:\n"
+ "vsetvli t4, %2, e8, m8, ta, ma\n"
+ "vle8.v v16, (%1)\n"
+ "add %1, %1, t4\n"
+ "vse8.v v16, (%0)\n"
+ "add %0, %0, t4\n"
+ "sub %2, %2, t4\n"
+ "bnez %2, 1b"
+ : "+r"(dst), "+r"(src), "+r"(n)
+ :
+ : "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "t4", "memory"
+ );
+}
+
+static __rte_always_inline void
+_rte_mov_aligned(uint8_t *dst, const uint8_t *src, uint32_t n)
+{
+ asm volatile (
+ "prefetch.r 128(%1)\n"
+ "prefetch.r 192(%1)\n"
+ "prefetch.r 256(%1)\n"
+ "prefetch.r 320(%1)\n"
+ "prefetch.r 384(%1)\n"
+ "prefetch.r 448(%1)\n"
+ "prefetch.r 512(%1)\n"
+ "prefetch.r 576(%1)\n"
+ "li t6, 640\n"
+ "1:\n"
+ "vsetvli t4, %2, e8, m8, ta, ma\n"
+ "vle8.v v16, (%1)\n"
+ "add %1, %1, t4\n"
+ "vse8.v v16, (%0)\n"
+ "add %0, %0, t4\n"
+ "sub %2, %2, t4\n"
+ "blt %2, t6, 3f\n"
+ "prefetch.r 512(%1)\n"
+ "prefetch.r 576(%1)\n"
+ "3:\n"
+ "bnez %2, 1b"
+ : "+r"(dst), "+r"(src), "+r"(n)
+ :
+ : "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "t4", "t6", "memory"
+ );
+}
+
+static __rte_always_inline void *
+_rte_memcpy_generic(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ void *ret = dst;
+ size_t dstofss;
+ uint32_t bn;
+
+ if (n <= 384) {
+ if (n >= 256) {
+ memcpy_prefetch128_2(src, dst);
+ n -= 256;
+ _rte_mov128(dst, src);
+ _rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+ if (n >= 128) {
+ memcpy_prefetch128_1(src, dst);
+ n -= 128;
+ _rte_mov128(dst, src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+
+ if (n >= 64) {
+ memcpy_prefetch64_1(src, dst);
+ n -= 64;
+ _rte_mov64(dst, src);
+ src = (const uint8_t *)src + 64;
+ dst = (uint8_t *)dst + 64;
+ }
+
+ if (n > 32) {
+ _rte_mov32(dst, src);
+ _rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+
+ if (n > 0) {
+ _rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 256 bytes.
+ */
+ dstofss = (uintptr_t)dst & ALIGNMENT_MASK_64;
+ if (dstofss > 0) {
+ dstofss = 64 - dstofss;
+ n -= dstofss;
+ _rte_mov64(dst, src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+
+ /**
+ * Copy 128-byte blocks
+ */
+ if ((uintptr_t)src & ALIGNMENT_MASK_64) {
+ bn = n - (n & ALIGNMENT_MASK_128);
+ _rte_mov128blocks(dst, src, bn);
+ n = n & ALIGNMENT_MASK_128;
+ src = (const uint8_t *)src + bn;
+ dst = (uint8_t *)dst + bn;
+ _rte_mov(dst, src, n);
+ } else
+ _rte_mov_aligned(dst, src, n);
+
+ return ret;
+}
+
+static __rte_always_inline void *
+_rte_memcpy(void *dst, const void *src, size_t n)
+{
+ return _rte_memcpy_generic((uint8_t *)dst, (const uint8_t *)src, n);
+}
+#endif
+
+/*----------------------api---------------------------------------------------*/
+static __rte_always_inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+#if defined(RTE_RISCV_FEATURE_V)
+ if (likely((memcpy_alg == MEMCPY_RISCV) && (n >= 128) && (n < 2048)))
+ return _rte_memcpy(dst, src, n);
+ /*else*/
+#endif
+ return memcpy(dst, src, n);
+}
+
static inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
@@ -51,10 +336,31 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
static inline void
rte_mov256(uint8_t *dst, const uint8_t *src)
{
- memcpy(dst, src, 256);
+#if defined(RTE_RISCV_FEATURE_V)
+ if (likely(memcpy_alg == MEMCPY_RISCV))
+ _rte_mov256(dst, src);
+ else
+#endif
+ memcpy(dst, src, 256);
+}
+/*----------------------------------------------------------------------------*/
+#if defined(RTE_RISCV_FEATURE_V)
+static inline long
+riscv_vlenb(void)
+{
+ long vlenb;
+ asm ("csrr %0, 0xc22" : "=r"(vlenb));
+ return vlenb;
}
-#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
+RTE_INIT(rte_vect_memcpy_init)
+{
+ long vlenb = riscv_vlenb();
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_RISCV_ISA_V) && (vlenb >= RISCV_VLENB))
+ memcpy_alg = MEMCPY_RISCV;
+}
+#endif
+
#ifdef __cplusplus
}
--
2.21.0.windows.1
[-- Attachment #1.1.2: Type: text/html , Size: 26648 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v1 2/2] benchmark report for rte_memcpy
2025-10-16 9:09 [PATCH v1 0/2] Optimization Summary for RISC-V rte_memcpy Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 1/2] riscv support rte_memcpy in vector Qiguo Chen
@ 2025-10-16 9:09 ` Qiguo Chen
1 sibling, 0 replies; 3+ messages in thread
From: Qiguo Chen @ 2025-10-16 9:09 UTC (permalink / raw)
To: stanislaw.kardach, sunyuechi, stephen; +Cc: dev, bruce.richardson, Qiguo Chen
[-- Attachment #1.1.1: Type: text/plain, Size: 12710 bytes --]
Benchmark results show 10~15% reduction in execution time for
data sizes (129~1024)
Signed-off-by: Qiguo Chen <chen.qiguo@zte.com.cn>
---
benchmark_report.txt | 149 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 149 insertions(+)
create mode 100644 benchmark_report.txt
diff --git a/benchmark_report.txt b/benchmark_report.txt
new file mode 100644
index 0000000000..499d3fc5f0
--- /dev/null
+++ b/benchmark_report.txt
@@ -0,0 +1,149 @@
+================================= 16B aligned =================================
+ 1 0 - 0( 24.27%) 1 - 1( 13.14%) 2 - 2( -3.48%) 3 - 3( 2.70%)
+ 2 0 - 0( 21.92%) 1 - 1( 4.75%) 2 - 2( -3.58%) 3 - 3( 3.05%)
+ 3 0 - 0( 23.54%) 1 - 1( 9.74%) 2 - 2( -4.24%) 3 - 3( 2.26%)
+ 4 0 - 0( 22.54%) 1 - 1( 7.10%) 2 - 2( -3.96%) 3 - 3( 0.99%)
+ 5 0 - 0( 22.73%) 1 - 1( 9.02%) 2 - 2( -3.18%) 3 - 3( 1.60%)
+ 6 0 - 0( 56.22%) 1 - 1( 8.21%) 2 - 2( -3.65%) 3 - 3( 1.10%)
+ 7 0 - 0( 23.07%) 1 - 1( 6.82%) 2 - 2( -3.53%) 3 - 3( 3.46%)
+ 8 0 - 0( 23.49%) 1 - 1( 7.70%) 2 - 2( -0.26%) 3 - 3( 2.22%)
+ 9 0 - 0( 56.70%) 1 - 1( 7.04%) 2 - 2( -3.75%) 3 - 3( 2.52%)
+ 12 0 - 0( 23.87%) 1 - 1( 5.80%) 2 - 2( -3.76%) 3 - 3( 1.49%)
+ 15 0 - 0( 22.95%) 1 - 1( 5.01%) 2 - 2( -3.52%) 3 - 3( 2.82%)
+ 16 0 - 0( 57.49%) 1 - 1( 7.30%) 2 - 2( 0.19%) 3 - 3( 3.19%)
+ 17 0 - 0( 53.78%) 3 - 2( 51.65%) 4 - 3( 37.35%) 4 - 3( 23.94%)
+ 31 0 - 0( 27.02%) 3 - 2( 51.99%) 4 - 3( 37.34%) 4 - 3( 24.09%)
+ 32 0 - 0( 56.82%) 3 - 2( 50.42%) 4 - 3( 39.73%) 4 - 3( 25.04%)
+ 33 0 - 0( 30.60%) 3 - 3( 30.94%) 6 - 4( 46.89%) 6 - 5( 26.21%)
+ 63 0 - 0( 16.84%) 4 - 3( 21.57%) 6 - 5( 31.74%) 7 - 6( 18.01%)
+ 64 0 - 0( 21.98%) 4 - 3( 21.35%) 6 - 5( 36.13%) 7 - 6( 20.05%)
+ 65 0 - 0( 20.60%) 5 - 4( 31.05%) 6 - 5( 24.16%) 8 - 7( 5.69%)
+ 127 0 - 0( 18.22%) 6 - 6( 9.34%) 8 - 7( 9.72%) 11 - 11( 2.73%)
+ 128 0 - 0( 39.80%) 6 - 6( -0.93%) 8 - 7( 9.65%) 11 - 11( 4.63%)
+ 129 0 - 1(-50.92%) 6 - 7( -4.00%) 9 - 12(-28.67%) 11 - 16(-34.28%)
+ 191 1 - 1(-45.09%) 9 - 9( 5.04%) 12 - 13(-11.82%) 13 - 16(-15.66%)
+ 192 1 - 1(-43.44%) 7 - 9(-18.67%) 12 - 13( -5.92%) 13 - 15(-18.50%)
+ 193 1 - 1(-24.84%) 9 - 9( -5.60%) 12 - 13( -7.44%) 14 - 17(-14.15%)
+ 255 1 - 1(-23.65%) 11 - 11( -4.57%) 13 - 13( -3.46%) 16 - 18( -8.81%)
+ 256 1 - 1( 16.87%) 9 - 11(-13.78%) 14 - 13( 8.58%) 16 - 16( 5.20%)
+ 257 1 - 1(-15.41%) 12 - 13( -6.90%) 15 - 16( -6.71%) 18 - 19( -6.35%)
+ 319 1 - 1(-12.93%) 15 - 19(-18.96%) 17 - 17( -0.55%) 21 - 21( -1.25%)
+ 320 1 - 1(-16.38%) 10 - 17(-39.05%) 18 - 17( 4.65%) 20 - 20( -2.94%)
+ 321 1 - 1( -6.24%) 12 - 19(-36.30%) 18 - 17( 6.65%) 20 - 22( -8.86%)
+ 383 1 - 1( -4.06%) 16 - 20(-17.87%) 18 - 17( 9.18%) 23 - 23( 1.42%)
+ 384 1 - 1( 12.87%) 11 - 18(-36.31%) 18 - 18( 1.92%) 20 - 22( -8.22%)
+ 385 2 - 2( 26.46%) 11 - 20(-46.76%) 15 - 20(-22.07%) 19 - 24(-18.04%)
+ 447 2 - 1( 55.03%) 14 - 21(-34.10%) 15 - 20(-22.75%) 21 - 27(-23.99%)
+ 448 2 - 1( 18.00%) 12 - 20(-38.82%) 16 - 20(-20.82%) 21 - 25(-18.74%)
+ 449 4 - 2(141.90%) 13 - 22(-42.36%) 16 - 20(-22.84%) 21 - 26(-18.35%)
+ 511 3 - 2( 57.68%) 14 - 23(-37.60%) 16 - 20(-18.33%) 21 - 28(-22.10%)
+ 512 2 - 1( 27.98%) 12 - 21(-40.06%) 17 - 20(-15.21%) 21 - 26(-19.65%)
+ 513 2 - 2( 22.93%) 13 - 23(-43.25%) 18 - 22(-19.53%) 23 - 31(-26.70%)
+ 767 7 - 6( 29.60%) 21 - 29(-28.37%) 29 - 23( 29.04%) 32 - 35( -9.38%)
+ 768 6 - 3( 96.51%) 19 - 27(-29.32%) 23 - 21( 6.62%) 31 - 33( -6.22%)
+ 769 7 - 4( 94.30%) 21 - 28(-27.50%) 25 - 24( 3.23%) 32 - 37(-12.00%)
+ 1023 8 - 5( 72.12%) 25 - 34(-27.27%) 34 - 26( 33.59%) 37 - 42(-11.18%)
+ 1024 8 - 6( 41.80%) 23 - 32(-26.49%) 26 - 25( 4.23%) 37 - 40( -7.72%)
+ 1025 8 - 7( 9.36%) 25 - 34(-25.78%) 29 - 27( 7.68%) 38 - 42( -8.87%)
+ 1518 7 - 4( 71.47%) 34 - 45(-24.17%) 45 - 30( 47.69%) 51 - 53( -4.93%)
+ 1522 10 - 8( 19.45%) 35 - 45(-23.62%) 46 - 31( 47.81%) 51 - 52( -0.49%)
+ 1536 10 - 6( 62.55%) 32 - 42(-23.80%) 37 - 29( 29.19%) 50 - 51( -2.70%)
+ 1600 11 - 9( 20.69%) 34 - 43(-21.19%) 47 - 32( 45.63%) 49 - 53( -7.68%)
+ 2048 13 - 10( 26.67%) 53 - 53( -0.25%) 37 - 35( 7.16%) 61 - 62( -0.90%)
+ 2560 16 - 13( 25.07%) 62 - 59( 5.23%) 44 - 45( -0.71%) 71 - 70( 1.05%)
+ 3072 20 - 20( 1.91%) 72 - 71( 1.91%) 49 - 50( -3.36%) 82 - 82( -0.59%)
+ 3584 26 - 26( -0.81%) 81 - 81( -0.17%) 58 - 57( 1.17%) 92 - 91( 1.28%)
+ 4096 25 - 27( -9.39%) 90 - 90( 0.54%) 64 - 63( 0.67%) 102 -102( 0.70%)
+ 4608 31 - 27( 18.45%) 99 - 99( -0.00%) 70 - 70( 0.47%) 111 -111( 0.09%)
+ 5120 41 - 35( 16.65%) 108 -108( -0.28%) 78 - 77( 0.52%) 120 -120( 0.37%)
+ 5632 46 - 47( -2.05%) 117 -117( 0.12%) 85 - 85( 0.38%) 130 -130( -0.19%)
+ 6144 52 - 44( 18.06%) 126 -126( 0.01%) 94 - 93( 0.80%) 139 -138( 0.27%)
+ 6656 27 - 41(-33.88%) 135 -134( 0.33%) 102 -102( 0.52%) 149 -148( 1.11%)
+ 7168 56 - 27(104.91%) 143 -142( 0.33%) 110 -110( 0.15%) 157 -157( 0.07%)
+ 7680 66 - 70( -5.18%) 152 -152( 0.03%) 118 -117( 0.27%) 166 -166( 0.17%)
+ 8192 69 - 44( 57.50%) 161 -160( 0.45%) 125 -124( 0.35%) 176 -175( 0.41%)
+------- ----------------- ----------------- ----------------- -----------------
+C 6 0 - 0( -1.10%) 1 - 1( 9.45%) 2 - 2( -0.19%) 3 - 3( 2.77%)
+C 64 0 - 0( 0.60%) 3 - 3( 1.28%) 4 - 4( -0.18%) 6 - 6( 0.50%)
+C 128 0 - 0( 35.46%) 6 - 6( -3.33%) 8 - 7( 7.02%) 11 - 11( 1.72%)
+C 192 0 - 1(-48.74%) 7 - 8(-20.51%) 12 - 13(-12.42%) 12 - 15(-22.26%)
+C 256 1 - 1( 11.88%) 9 - 11(-15.05%) 13 - 13( 0.17%) 15 - 16( -1.65%)
+C 512 2 - 1( 27.80%) 13 - 22(-40.28%) 16 - 19(-12.48%) 22 - 25(-13.57%)
+C 768 2 - 2( 11.66%) 18 - 26(-30.06%) 23 - 21( 5.93%) 31 - 33( -7.73%)
+C 1024 6 - 4( 32.78%) 23 - 31(-25.36%) 26 - 24( 5.56%) 37 - 39( -6.05%)
+C 1536 9 - 7( 33.48%) 32 - 43(-23.71%) 37 - 29( 26.46%) 50 - 50( -0.05%)
+================================== Unaligned ==================================
+ 1 0 - 0( 32.71%) 1 - 1( 7.91%) 2 - 2( 0.99%) 3 - 3( 3.36%)
+ 2 0 - 0( 33.59%) 1 - 1( 6.69%) 2 - 2( 1.04%) 3 - 3( 1.19%)
+ 3 0 - 0( 33.20%) 1 - 1( 8.36%) 2 - 2( 0.87%) 3 - 3( 3.03%)
+ 4 0 - 0( 33.41%) 1 - 1( 6.50%) 2 - 2( 1.03%) 3 - 3( 2.77%)
+ 5 0 - 0( 32.00%) 1 - 1( 6.83%) 2 - 2( 1.16%) 3 - 3( 2.28%)
+ 6 0 - 0( 33.29%) 1 - 1( 7.94%) 2 - 2( 0.93%) 3 - 3( 0.17%)
+ 7 0 - 0( 32.69%) 1 - 1( 6.01%) 2 - 2( 0.93%) 3 - 2( 4.20%)
+ 8 0 - 0( 33.99%) 1 - 1( 5.62%) 2 - 2( 0.92%) 3 - 3( 1.09%)
+ 9 0 - 0( 32.63%) 1 - 1( 6.33%) 2 - 2( 1.13%) 3 - 3( 2.01%)
+ 12 0 - 0( 33.10%) 1 - 1( 7.30%) 4 - 3( 47.16%) 5 - 3( 41.00%)
+ 15 0 - 0( 32.30%) 1 - 1( 6.96%) 4 - 3( 47.34%) 5 - 3( 43.19%)
+ 16 0 - 0( 18.41%) 3 - 2( 68.45%) 4 - 3( 62.20%) 5 - 3( 35.47%)
+ 17 0 - 0( 7.81%) 4 - 3( 37.51%) 5 - 3( 59.08%) 6 - 4( 40.54%)
+ 31 0 - 0( 33.54%) 4 - 3( 31.79%) 6 - 4( 47.27%) 6 - 4( 39.17%)
+ 32 0 - 0( 32.98%) 4 - 3( 29.22%) 6 - 4( 46.89%) 6 - 5( 35.76%)
+ 33 0 - 0( 27.50%) 4 - 4( 6.37%) 6 - 5( 34.85%) 7 - 6( 19.56%)
+ 63 0 - 0( 44.23%) 5 - 5( 19.68%) 7 - 7( 3.62%) 9 - 9( 7.96%)
+ 64 0 - 0( 29.92%) 5 - 5( 14.45%) 7 - 7( 3.11%) 9 - 9( 7.57%)
+ 65 0 - 0( 3.00%) 6 - 5( 6.09%) 8 - 7( 2.61%) 10 - 10( 4.75%)
+ 127 1 - 0( 16.12%) 9 - 8( 10.20%) 12 - 12( -0.66%) 14 - 14( 2.06%)
+ 128 1 - 1( 11.58%) 8 - 8( 2.75%) 12 - 12( -2.33%) 13 - 14( -7.63%)
+ 129 1 - 1(-48.77%) 10 - 12(-13.37%) 12 - 16(-22.85%) 14 - 22(-35.87%)
+ 191 1 - 1(-36.20%) 11 - 12( -4.61%) 13 - 18(-27.05%) 17 - 27(-39.94%)
+ 192 1 - 1(-31.62%) 11 - 12( -9.55%) 14 - 18(-18.64%) 18 - 28(-34.80%)
+ 193 1 - 2(-36.96%) 13 - 13( 0.19%) 15 - 17(-12.88%) 20 - 28(-29.62%)
+ 255 1 - 2(-35.46%) 16 - 18(-12.89%) 17 - 17( 0.23%) 22 - 28(-21.79%)
+ 256 1 - 1( 7.89%) 17 - 19(-10.33%) 17 - 18( -3.25%) 24 - 28(-16.62%)
+ 257 1 - 2(-28.10%) 16 - 19(-11.20%) 19 - 20( -6.80%) 23 - 32(-27.58%)
+ 319 1 - 2(-21.72%) 18 - 19( -6.08%) 21 - 21( 3.22%) 25 - 33(-22.94%)
+ 320 1 - 2(-23.13%) 16 - 21(-19.75%) 21 - 21( 2.12%) 26 - 33(-22.39%)
+ 321 1 - 2(-22.90%) 16 - 21(-22.21%) 21 - 20( 2.73%) 26 - 33(-22.90%)
+ 383 2 - 2(-22.35%) 19 - 20( -7.58%) 21 - 20( 0.49%) 29 - 33(-12.06%)
+ 384 2 - 2( 3.32%) 16 - 21(-22.26%) 20 - 20( 2.75%) 28 - 33(-13.58%)
+ 385 2 - 2(-36.41%) 14 - 21(-32.50%) 18 - 23(-22.20%) 27 - 35(-23.63%)
+ 447 2 - 2( 4.13%) 14 - 20(-28.61%) 16 - 23(-29.60%) 26 - 35(-23.79%)
+ 448 2 - 2(-21.37%) 14 - 22(-35.54%) 18 - 23(-21.17%) 27 - 36(-23.90%)
+ 449 2 - 2(-26.56%) 14 - 22(-36.19%) 18 - 22(-18.43%) 27 - 35(-22.43%)
+ 511 2 - 3(-31.11%) 14 - 22(-35.23%) 19 - 22(-16.50%) 29 - 35(-16.05%)
+ 512 2 - 2( -5.05%) 15 - 24(-37.63%) 19 - 22(-12.75%) 29 - 35(-15.81%)
+ 513 2 - 3(-27.14%) 15 - 24(-38.02%) 19 - 24(-20.39%) 30 - 36(-18.37%)
+ 767 3 - 4(-24.58%) 21 - 28(-26.97%) 23 - 25( -8.20%) 34 - 40(-13.70%)
+ 768 3 - 3( -0.56%) 21 - 29(-27.01%) 23 - 25( -5.67%) 34 - 39(-13.71%)
+ 769 3 - 3(-20.43%) 21 - 29(-26.40%) 23 - 27(-13.86%) 34 - 41(-15.93%)
+ 1023 5 - 5( -7.38%) 23 - 32(-27.22%) 27 - 28( -3.98%) 39 - 44(-11.72%)
+ 1024 5 - 6(-17.62%) 25 - 33(-25.40%) 27 - 28( -2.44%) 39 - 43(-11.14%)
+ 1025 5 - 4( 3.62%) 25 - 33(-25.57%) 27 - 29( -8.17%) 39 - 46(-16.26%)
+ 1518 10 - 10( -4.77%) 33 - 42(-20.47%) 36 - 34( 6.36%) 53 - 54( -2.01%)
+ 1522 10 - 11( -5.28%) 34 - 42(-18.86%) 36 - 33( 8.35%) 53 - 53( -1.57%)
+ 1536 7 - 8(-12.20%) 34 - 42(-19.11%) 39 - 33( 17.70%) 53 - 54( -0.54%)
+ 1600 11 - 9( 20.88%) 35 - 43(-18.54%) 31 - 35(-10.26%) 50 - 55( -9.91%)
+ 2048 15 - 8( 99.56%) 51 - 51( 0.24%) 40 - 39( 1.22%) 64 - 62( 3.14%)
+ 2560 17 - 16( 1.33%) 59 - 60( -0.76%) 47 - 47( 0.75%) 73 - 73( -0.56%)
+ 3072 22 - 20( 8.49%) 68 - 68( 0.32%) 53 - 54( -2.01%) 82 - 83( -0.37%)
+ 3584 30 - 32( -4.26%) 76 - 76( 0.19%) 61 - 60( 1.03%) 91 - 92( -0.92%)
+ 4096 34 - 28( 22.80%) 85 - 86( -0.61%) 67 - 67( 0.03%) 100 -100( -0.08%)
+ 4608 34 - 36( -4.01%) 93 - 93( 0.17%) 74 - 75( -0.47%) 109 -109( 0.44%)
+ 5120 35 - 29( 20.42%) 102 -102( -0.11%) 82 - 82( -0.08%) 119 -119( 0.53%)
+ 5632 44 - 41( 8.71%) 110 -110( 0.14%) 89 - 90( -0.16%) 128 -127( 0.16%)
+ 6144 40 - 48(-17.75%) 119 -119( 0.12%) 98 - 99( -0.31%) 138 -137( 0.56%)
+ 6656 53 - 54( -0.83%) 127 -127( 0.14%) 107 -107( -0.07%) 146 -145( 0.50%)
+ 7168 56 - 59( -5.16%) 136 -136( 0.18%) 115 -115( -0.13%) 155 -155( -0.34%)
+ 7680 71 - 68( 4.02%) 144 -144( 0.01%) 123 -123( -0.06%) 164 -163( 0.47%)
+ 8192 76 - 65( 17.61%) 152 -153( -0.36%) 130 -130( -0.04%) 174 -174( 0.13%)
+------- ----------------- ----------------- ----------------- -----------------
+C 6 0 - 0( 1.10%) 1 - 1( 8.55%) 2 - 2( 0.06%) 3 - 3( 4.86%)
+C 64 0 - 0( -3.20%) 5 - 5( 0.54%) 7 - 7( 0.27%) 9 - 9( -0.50%)
+C 128 1 - 0( 25.53%) 9 - 8( 3.56%) 12 - 12( -3.53%) 13 - 14( -8.98%)
+C 192 1 - 1(-37.27%) 11 - 12(-10.10%) 13 - 17(-23.33%) 17 - 28(-38.96%)
+C 256 1 - 1( 3.35%) 17 - 19( -8.99%) 16 - 18( -7.62%) 23 - 29(-20.07%)
+C 512 2 - 2( -6.31%) 14 - 24(-38.90%) 19 - 22(-13.61%) 29 - 35(-16.90%)
+C 768 3 - 3( -0.45%) 21 - 29(-25.43%) 23 - 25( -6.64%) 34 - 40(-13.59%)
+C 1024 6 - 6( -5.63%) 25 - 33(-24.23%) 27 - 28( -3.26%) 39 - 43(-10.94%)
+C 1536 8 - 8( 3.04%) 34 - 43(-19.62%) 38 - 33( 15.48%) 53 - 53( -0.43%)
+======= ================= ================= ================= =================
--
2.21.0.windows.1
[-- Attachment #1.1.2: Type: text/html , Size: 33307 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-10-16 9:25 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-16 9:09 [PATCH v1 0/2] Optimization Summary for RISC-V rte_memcpy Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 1/2] riscv support rte_memcpy in vector Qiguo Chen
2025-10-16 9:09 ` [PATCH v1 2/2] benchmark report for rte_memcpy Qiguo Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).