DPDK patches and discussions
 help / color / mirror / Atom feed
From: Sun Yuechi <sunyuechi@iscas.ac.cn>
To: dev@dpdk.org
Cc: "Sun Yuechi" <sunyuechi@iscas.ac.cn>,
	"Stanisław Kardach" <stanislaw.kardach@gmail.com>,
	"Bruce Richardson" <bruce.richardson@intel.com>
Subject: [PATCH] eal/riscv: optimize memcpy for small copies under 64 bytes
Date: Thu,  9 Oct 2025 14:30:30 +0800	[thread overview]
Message-ID: <20251009063030.2776794-1-sunyuechi@iscas.ac.cn> (raw)

Improve rte_memcpy implementation on RISC-V platform for sizes under
64 bytes, based on the ARM implementation.

Enhanced handling for cases smaller than 64 bytes shows very significant
performance benefits, while the impact is minimal after 64 bytes.

This optimization is disabled by default as a conservative measure,
since future glibc versions may include similar improvements that
could conflict with this implementation.

Use RTE_ARCH_RISCV_MEMCPY to enable this optimization.

Signed-off-by: Sun Yuechi <sunyuechi@iscas.ac.cn>
---
 config/riscv/meson.build           |   5 ++
 lib/eal/riscv/include/rte_memcpy.h | 122 +++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/config/riscv/meson.build b/config/riscv/meson.build
index f93ea3e145..73fd0ab4da 100644
--- a/config/riscv/meson.build
+++ b/config/riscv/meson.build
@@ -20,6 +20,11 @@ dpdk_conf.set('RTE_FORCE_INTRINSICS', 1)
 
 # common flags to all riscv builds, with lowest priority
 flags_common = [
+    # Accelerate rte_memcpy for copies smaller than 64 bytes. Be sure to run
+    # the unit test (memcpy_perf_autotest) to verify performance improvements.
+    # Refer to notes in source file (lib/eal/riscv/include/rte_memcpy.h) for
+    # more details.
+    ['RTE_ARCH_RISCV_MEMCPY', false],
     ['RTE_ARCH_RISCV', true],
     ['RTE_CACHE_LINE_SIZE', 64],
     # Manually set wall time clock frequency for the target. If 0, then it is
diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h
index d8a942c5d2..ae6e79e2fc 100644
--- a/lib/eal/riscv/include/rte_memcpy.h
+++ b/lib/eal/riscv/include/rte_memcpy.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2022 StarFive
  * Copyright(c) 2022 SiFive
  * Copyright(c) 2022 Semihalf
+ * Copyright(c) 2025 ISCAS
  */
 
 #ifndef RTE_MEMCPY_RISCV_H
@@ -14,6 +15,125 @@
 
 #include "generic/rte_memcpy.h"
 
+#ifdef RTE_ARCH_RISCV_MEMCPY
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This implementation is improved from eal/arm/include/rte_memcpy_64.h,
+ * targeting only cases of < 64 bytes.
+ * Currently shows significant performance improvement over various glibc versions,
+ * but is disabled by default due to uncertainty about potential performance
+ * degradation in future versions.
+ * You can use memcpy_perf_autotest to test the performance.
+ */
+
+static __rte_always_inline
+void rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__uint128_t *dst128 = (__uint128_t *)dst;
+	const __uint128_t *src128 = (const __uint128_t *)src;
+	*dst128 = *src128;
+}
+
+static __rte_always_inline
+void rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	__uint128_t *dst128 = (__uint128_t *)dst;
+	const __uint128_t *src128 = (const __uint128_t *)src;
+	const __uint128_t x0 = src128[0], x1 = src128[1];
+	dst128[0] = x0;
+	dst128[1] = x1;
+}
+
+static __rte_always_inline
+void rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	__uint128_t *dst128 = (__uint128_t *)dst;
+	const __uint128_t *src128 = (const __uint128_t *)src;
+	const __uint128_t x0 = src128[0], x1 = src128[1], x2 = src128[2];
+	dst128[0] = x0;
+	dst128[1] = x1;
+	dst128[2] = x2;
+}
+
+static __rte_always_inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+
+static __rte_always_inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+
+static __rte_always_inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+
+static __rte_always_inline void
+rte_memcpy_lt16(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	if (n & 0x08) {
+		/* copy 8 ~ 15 bytes */
+		*(uint64_t *)dst = *(const uint64_t *)src;
+		*(uint64_t *)(dst - 8 + n) = *(const uint64_t *)(src - 8 + n);
+	} else if (n & 0x04) {
+		/* copy 4 ~ 7 bytes */
+		*(uint32_t *)dst = *(const uint32_t *)src;
+		*(uint32_t *)(dst - 4 + n) = *(const uint32_t *)(src - 4 + n);
+	} else if (n & 0x02) {
+		/* copy 2 ~ 3 bytes */
+		*(uint16_t *)dst = *(const uint16_t *)src;
+		*(uint16_t *)(dst - 2 + n) = *(const uint16_t *)(src - 2 + n);
+	} else if (n & 0x01) {
+		/* copy 1 byte */
+		*dst = *src;
+	}
+}
+
+static __rte_always_inline void
+rte_memcpy_ge16_lt64(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	if (n == 16) {
+		rte_mov16(dst, src);
+	} else if (n <= 32) {
+		rte_mov16(dst, src);
+		rte_mov16(dst - 16 + n, src - 16 + n);
+	} else if (n <= 48) {
+		rte_mov32(dst, src);
+		rte_mov16(dst - 16 + n, src - 16 + n);
+	} else {
+		rte_mov48(dst, src);
+		rte_mov16(dst - 16 + n, src - 16 + n);
+	}
+}
+
+static __rte_always_inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+	if (n >= 64)
+		return memcpy(dst, src, n);
+	if (n < 16) {
+		rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n);
+		return dst;
+	}
+	rte_memcpy_ge16_lt64((uint8_t *)dst, (const uint8_t *)src, n);
+	return dst;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#else /* RTE_ARCH_RISCV_MEMCPY */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -60,4 +180,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 }
 #endif
 
+#endif /* RTE_ARCH_RISCV_MEMCPY */
+
 #endif /* RTE_MEMCPY_RISCV_H */
-- 
2.51.0


             reply	other threads:[~2025-10-09  6:32 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-09  6:30 Sun Yuechi [this message]
2025-10-09  8:17 ` Stephen Hemminger
2025-10-09  8:43   ` sunyuechi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251009063030.2776794-1-sunyuechi@iscas.ac.cn \
    --to=sunyuechi@iscas.ac.cn \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=stanislaw.kardach@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).