DPDK patches and discussions
 help / color / mirror / Atom feed
From: Chao Zhu <bjzhuc@cn.ibm.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH 6/7] Split memcpy operation to architecture specific
Date: Fri, 26 Sep 2014 05:33:37 -0400	[thread overview]
Message-ID: <1411724018-7738-7-git-send-email-bjzhuc@cn.ibm.com> (raw)
In-Reply-To: <1411724018-7738-1-git-send-email-bjzhuc@cn.ibm.com>

This patch splits the vector instruction based memory copy from DPDK and
push them to architecture specific arch directories, so that other
processor architecture to support DPDK can be easily adopted.

Signed-off-by: Chao Zhu <bjzhuc@cn.ibm.com>
---
 lib/librte_eal/common/Makefile                     |    2 +-
 .../common/include/i686/arch/rte_memcpy_arch.h     |  199 ++++++++++++++++++++
 lib/librte_eal/common/include/rte_memcpy.h         |   95 +---------
 .../common/include/x86_64/arch/rte_memcpy_arch.h   |  199 ++++++++++++++++++++
 4 files changed, 406 insertions(+), 89 deletions(-)
 create mode 100644 lib/librte_eal/common/include/i686/arch/rte_memcpy_arch.h
 create mode 100644 lib/librte_eal/common/include/x86_64/arch/rte_memcpy_arch.h

diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 249ea2f..4add1c1 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -46,7 +46,7 @@ ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
 INC += rte_warnings.h
 endif
 
-ARCH_INC := rte_atomic.h rte_atomic_arch.h rte_byteorder_arch.h rte_cycles_arch.h rte_prefetch_arch.h rte_spinlock_arch.h
+ARCH_INC := rte_atomic.h rte_atomic_arch.h rte_byteorder_arch.h rte_cycles_arch.h rte_prefetch_arch.h rte_spinlock_arch.h rte_memcpy_arch.h 
 
 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
 SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/arch := \
diff --git a/lib/librte_eal/common/include/i686/arch/rte_memcpy_arch.h b/lib/librte_eal/common/include/i686/arch/rte_memcpy_arch.h
new file mode 100644
index 0000000..44f7760
--- /dev/null
+++ b/lib/librte_eal/common/include/i686/arch/rte_memcpy_arch.h
@@ -0,0 +1,199 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARCH_H_
+#define _RTE_MEMCPY_ARCH_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <emmintrin.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */
+#endif
+
+/**
+ * Copy 16 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		: [reg_a] "=x" (reg_a)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 32 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov32(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 48 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov48(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 64 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov64(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c, reg_d;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu 48(%[src]), %[reg_d]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		"movdqu %[reg_d], 48(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c),
+		  [reg_d] "=x" (reg_d)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 128 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov128(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu 48(%[src]), %[reg_d]\n\t"
+		"movdqu 64(%[src]), %[reg_e]\n\t"
+		"movdqu 80(%[src]), %[reg_f]\n\t"
+		"movdqu 96(%[src]), %[reg_g]\n\t"
+		"movdqu 112(%[src]), %[reg_h]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		"movdqu %[reg_d], 48(%[dst])\n\t"
+		"movdqu %[reg_e], 64(%[dst])\n\t"
+		"movdqu %[reg_f], 80(%[dst])\n\t"
+		"movdqu %[reg_g], 96(%[dst])\n\t"
+		"movdqu %[reg_h], 112(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c),
+		  [reg_d] "=x" (reg_d),
+		  [reg_e] "=x" (reg_e),
+		  [reg_f] "=x" (reg_f),
+		  [reg_g] "=x" (reg_g),
+		  [reg_h] "=x" (reg_h)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+#endif /* _RTE_MEMCPY_ARCH_H_ */
\ No newline at end of file
diff --git a/lib/librte_eal/common/include/rte_memcpy.h b/lib/librte_eal/common/include/rte_memcpy.h
index 131b196..11a099e 100644
--- a/lib/librte_eal/common/include/rte_memcpy.h
+++ b/lib/librte_eal/common/include/rte_memcpy.h
@@ -37,12 +37,10 @@
 /**
  * @file
  *
- * Functions for SSE implementation of memcpy().
+ * Functions for vector instruction implementation of memcpy().
  */
 
-#include <stdint.h>
-#include <string.h>
-#include <emmintrin.h>
+#include "arch/rte_memcpy_arch.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -64,15 +62,7 @@ extern "C" {
 static inline void
 rte_mov16(uint8_t *dst, const uint8_t *src)
 {
-	__m128i reg_a;
-	asm volatile (
-		"movdqu (%[src]), %[reg_a]\n\t"
-		"movdqu %[reg_a], (%[dst])\n\t"
-		: [reg_a] "=x" (reg_a)
-		: [src] "r" (src),
-		  [dst] "r"(dst)
-		: "memory"
-	);
+	rte_arch_mov16(dst, src);
 }
 
 /**
@@ -87,18 +77,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
-	__m128i reg_a, reg_b;
-	asm volatile (
-		"movdqu (%[src]), %[reg_a]\n\t"
-		"movdqu 16(%[src]), %[reg_b]\n\t"
-		"movdqu %[reg_a], (%[dst])\n\t"
-		"movdqu %[reg_b], 16(%[dst])\n\t"
-		: [reg_a] "=x" (reg_a),
-		  [reg_b] "=x" (reg_b)
-		: [src] "r" (src),
-		  [dst] "r"(dst)
-		: "memory"
-	);
+	rte_arch_mov32(dst, src);
 }
 
 /**
@@ -113,21 +92,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov48(uint8_t *dst, const uint8_t *src)
 {
-	__m128i reg_a, reg_b, reg_c;
-	asm volatile (
-		"movdqu (%[src]), %[reg_a]\n\t"
-		"movdqu 16(%[src]), %[reg_b]\n\t"
-		"movdqu 32(%[src]), %[reg_c]\n\t"
-		"movdqu %[reg_a], (%[dst])\n\t"
-		"movdqu %[reg_b], 16(%[dst])\n\t"
-		"movdqu %[reg_c], 32(%[dst])\n\t"
-		: [reg_a] "=x" (reg_a),
-		  [reg_b] "=x" (reg_b),
-		  [reg_c] "=x" (reg_c)
-		: [src] "r" (src),
-		  [dst] "r"(dst)
-		: "memory"
-	);
+	rte_arch_mov48(dst, src);
 }
 
 /**
@@ -142,24 +107,7 @@ rte_mov48(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
-	__m128i reg_a, reg_b, reg_c, reg_d;
-	asm volatile (
-		"movdqu (%[src]), %[reg_a]\n\t"
-		"movdqu 16(%[src]), %[reg_b]\n\t"
-		"movdqu 32(%[src]), %[reg_c]\n\t"
-		"movdqu 48(%[src]), %[reg_d]\n\t"
-		"movdqu %[reg_a], (%[dst])\n\t"
-		"movdqu %[reg_b], 16(%[dst])\n\t"
-		"movdqu %[reg_c], 32(%[dst])\n\t"
-		"movdqu %[reg_d], 48(%[dst])\n\t"
-		: [reg_a] "=x" (reg_a),
-		  [reg_b] "=x" (reg_b),
-		  [reg_c] "=x" (reg_c),
-		  [reg_d] "=x" (reg_d)
-		: [src] "r" (src),
-		  [dst] "r"(dst)
-		: "memory"
-	);
+	rte_arch_mov64(dst, src);
 }
 
 /**
@@ -174,36 +122,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
 static inline void
 rte_mov128(uint8_t *dst, const uint8_t *src)
 {
-	__m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
-	asm volatile (
-		"movdqu (%[src]), %[reg_a]\n\t"
-		"movdqu 16(%[src]), %[reg_b]\n\t"
-		"movdqu 32(%[src]), %[reg_c]\n\t"
-		"movdqu 48(%[src]), %[reg_d]\n\t"
-		"movdqu 64(%[src]), %[reg_e]\n\t"
-		"movdqu 80(%[src]), %[reg_f]\n\t"
-		"movdqu 96(%[src]), %[reg_g]\n\t"
-		"movdqu 112(%[src]), %[reg_h]\n\t"
-		"movdqu %[reg_a], (%[dst])\n\t"
-		"movdqu %[reg_b], 16(%[dst])\n\t"
-		"movdqu %[reg_c], 32(%[dst])\n\t"
-		"movdqu %[reg_d], 48(%[dst])\n\t"
-		"movdqu %[reg_e], 64(%[dst])\n\t"
-		"movdqu %[reg_f], 80(%[dst])\n\t"
-		"movdqu %[reg_g], 96(%[dst])\n\t"
-		"movdqu %[reg_h], 112(%[dst])\n\t"
-		: [reg_a] "=x" (reg_a),
-		  [reg_b] "=x" (reg_b),
-		  [reg_c] "=x" (reg_c),
-		  [reg_d] "=x" (reg_d),
-		  [reg_e] "=x" (reg_e),
-		  [reg_f] "=x" (reg_f),
-		  [reg_g] "=x" (reg_g),
-		  [reg_h] "=x" (reg_h)
-		: [src] "r" (src),
-		  [dst] "r"(dst)
-		: "memory"
-	);
+	rte_arch_mov128(dst, src);
 }
 
 #ifdef __INTEL_COMPILER
diff --git a/lib/librte_eal/common/include/x86_64/arch/rte_memcpy_arch.h b/lib/librte_eal/common/include/x86_64/arch/rte_memcpy_arch.h
new file mode 100644
index 0000000..44f7760
--- /dev/null
+++ b/lib/librte_eal/common/include/x86_64/arch/rte_memcpy_arch.h
@@ -0,0 +1,199 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARCH_H_
+#define _RTE_MEMCPY_ARCH_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <emmintrin.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */
+#endif
+
+/**
+ * Copy 16 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		: [reg_a] "=x" (reg_a)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 32 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov32(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 48 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov48(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 64 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov64(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c, reg_d;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu 48(%[src]), %[reg_d]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		"movdqu %[reg_d], 48(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c),
+		  [reg_d] "=x" (reg_d)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+/**
+ * Copy 128 bytes from one location to another using optimised SSE
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_arch_mov128(uint8_t *dst, const uint8_t *src)
+{
+	__m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
+	asm volatile (
+		"movdqu (%[src]), %[reg_a]\n\t"
+		"movdqu 16(%[src]), %[reg_b]\n\t"
+		"movdqu 32(%[src]), %[reg_c]\n\t"
+		"movdqu 48(%[src]), %[reg_d]\n\t"
+		"movdqu 64(%[src]), %[reg_e]\n\t"
+		"movdqu 80(%[src]), %[reg_f]\n\t"
+		"movdqu 96(%[src]), %[reg_g]\n\t"
+		"movdqu 112(%[src]), %[reg_h]\n\t"
+		"movdqu %[reg_a], (%[dst])\n\t"
+		"movdqu %[reg_b], 16(%[dst])\n\t"
+		"movdqu %[reg_c], 32(%[dst])\n\t"
+		"movdqu %[reg_d], 48(%[dst])\n\t"
+		"movdqu %[reg_e], 64(%[dst])\n\t"
+		"movdqu %[reg_f], 80(%[dst])\n\t"
+		"movdqu %[reg_g], 96(%[dst])\n\t"
+		"movdqu %[reg_h], 112(%[dst])\n\t"
+		: [reg_a] "=x" (reg_a),
+		  [reg_b] "=x" (reg_b),
+		  [reg_c] "=x" (reg_c),
+		  [reg_d] "=x" (reg_d),
+		  [reg_e] "=x" (reg_e),
+		  [reg_f] "=x" (reg_f),
+		  [reg_g] "=x" (reg_g),
+		  [reg_h] "=x" (reg_h)
+		: [src] "r" (src),
+		  [dst] "r"(dst)
+		: "memory"
+	);
+}
+
+#endif /* _RTE_MEMCPY_ARCH_H_ */
\ No newline at end of file
-- 
1.7.1

  parent reply	other threads:[~2014-09-26  9:27 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-26  9:33 [dpdk-dev] [PATCH 0/7] Patches to split architecture specific operations from DPDK Chao Zhu
2014-09-26  9:33 ` [dpdk-dev] [PATCH 1/7] Split atomic operations to architecture specific Chao Zhu
2014-09-29 11:05   ` Bruce Richardson
2014-09-29 15:24     ` Neil Horman
2014-09-30  2:18       ` Chao CH Zhu
2014-09-26  9:33 ` [dpdk-dev] [PATCH 2/7] Split byte order " Chao Zhu
2014-09-26  9:33 ` [dpdk-dev] [PATCH 3/7] Split CPU cycle operation " Chao Zhu
2014-09-26  9:33 ` [dpdk-dev] [PATCH 4/7] Split prefetch operations " Chao Zhu
2014-09-26  9:33 ` [dpdk-dev] [PATCH 5/7] Split spinlock " Chao Zhu
2014-09-26  9:33 ` Chao Zhu [this message]
2014-09-26  9:33 ` [dpdk-dev] [PATCH 7/7] Split CPU flags " Chao Zhu
2014-10-03 13:21 ` [dpdk-dev] [PATCH 0/7] Patches to split architecture specific operations from DPDK David Marchand
2014-10-03 13:29   ` Bruce Richardson
2014-10-13  2:36   ` Chao CH Zhu
2014-10-06 21:46 ` Cyril Chemparathy
2014-10-12  9:14   ` Chao CH Zhu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1411724018-7738-7-git-send-email-bjzhuc@cn.ibm.com \
    --to=bjzhuc@cn.ibm.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).