DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] eal: force gcc to inline rte_movX function
@ 2018-04-12  5:16 Junjie Chen
  2018-04-17 13:22 ` Thomas Monjalon
  0 siblings, 1 reply; 5+ messages in thread
From: Junjie Chen @ 2018-04-12  5:16 UTC (permalink / raw)
  To: bruce.richardson, konstantin.ananyev; +Cc: dev, Chen, Junjie, Chen

From: "Chen, Junjie" <junjie.j.chen@intel.com>

Sometimes gcc does not inline the function despite keyword *inline*,
we obeserve rte_movX is not inline when doing performance profiling,
so use *always_inline* keyword to force gcc to inline the function.

Signed-off-by: Chen, Junjie <junjie.j.chen@intel.com>
---
 .../common/include/arch/x86/rte_memcpy.h           | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index cc140ecca..5ead68ab2 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -52,7 +52,7 @@ rte_memcpy(void *dst, const void *src, size_t n);
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -65,7 +65,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	__m256i ymm0;
@@ -78,7 +78,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	__m512i zmm0;
@@ -91,7 +91,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
  * Copy 128 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -102,7 +102,7 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
  * Copy 256 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov256(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -293,7 +293,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -306,7 +306,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	__m256i ymm0;
@@ -319,7 +319,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
@@ -486,7 +486,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
 	__m128i xmm0;
@@ -499,7 +499,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
  * Copy 32 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -510,7 +510,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
  * Copy 64 bytes from one location to another,
  * locations should not overlap.
  */
-static inline void
+static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
 	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
-- 
2.16.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-04-18  7:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-12  5:16 [dpdk-dev] [PATCH] eal: force gcc to inline rte_movX function Junjie Chen
2018-04-17 13:22 ` Thomas Monjalon
2018-04-17 14:57   ` Bruce Richardson
2018-04-18  2:43     ` Chen, Junjie J
2018-04-18  7:25       ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).