* [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
@ 2015-12-06 15:24 Jerin Jacob
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
To: dev
Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
gcc intrinsic and build examples/distributor for non 86 platform
Not sure the rte_prefetch_non_temporal mapping correct for
all the platforms. Architecture maintainers please check the mapping for
rte_prefetch_non_temporal() for specific architecures
Jerin Jacob (2):
eal: introduce rte_prefetch_non_temporal
examples/distributor: remove IA specific __mm_prefetch
examples/distributor/main.c | 9 +++++----
lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 5 +++++
lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
7 files changed, 42 insertions(+), 4 deletions(-)
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
2015-12-06 15:24 [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2015-12-06 15:24 ` Jerin Jacob
2016-02-11 11:00 ` Thomas Monjalon
2016-02-11 11:43 ` Jan Viktorin
2015-12-06 15:24 ` [dpdk-dev] [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
` (2 subsequent siblings)
3 siblings, 2 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
To: dev
non-temporal/transient/stream version of rte_prefetch0()
The non-temporal prefetch is intended as a prefetch hint that processor
will use the prefetched data only once or short period,
unlike the rte_prefetch0() function which imply that
prefetched data to use repeatedly.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 5 +++++
lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
6 files changed, 37 insertions(+)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index b716384..3157224 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("pld [%0]" : : "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index f9cc62e..3ed46a4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index fea3be1..cab6fe0 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
index c94075c..19d3c6e 100644
--- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
__builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 8e6e02c..5dac47e 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -55,6 +55,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
index 725715f..95c3fbc 100644
--- a/lib/librte_eal/common/include/generic/rte_prefetch.h
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -68,4 +68,16 @@ static inline void rte_prefetch1(const volatile void *p);
*/
static inline void rte_prefetch2(const volatile void *p);
+/**
+ * Prefetch a cache line into all cache levels(non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
#endif /* _RTE_PREFETCH_H_ */
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch
2015-12-06 15:24 [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2015-12-06 15:24 ` Jerin Jacob
2016-01-29 3:21 ` [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
3 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
To: dev
use rte_prefetch_non_temporal() abstraction instead of _mm_prefetch(x, 0)
to in-order to build distributor application for non x86 platforms
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
examples/distributor/main.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/examples/distributor/main.c b/examples/distributor/main.c
index 4e74f8f..c0201a9 100644
--- a/examples/distributor/main.c
+++ b/examples/distributor/main.c
@@ -42,6 +42,7 @@
#include <rte_cycles.h>
#include <rte_malloc.h>
#include <rte_debug.h>
+#include <rte_prefetch.h>
#include <rte_distributor.h>
#define RX_RING_SIZE 256
@@ -335,13 +336,13 @@ lcore_tx(struct rte_ring *in_r)
/* for traffic we receive, queue it up for transmit */
uint16_t i;
- _mm_prefetch((void *)bufs[0], 0);
- _mm_prefetch((void *)bufs[1], 0);
- _mm_prefetch((void *)bufs[2], 0);
+ rte_prefetch_non_temporal((void *)bufs[0]);
+ rte_prefetch_non_temporal((void *)bufs[1]);
+ rte_prefetch_non_temporal((void *)bufs[2]);
for (i = 0; i < nb_rx; i++) {
struct output_buffer *outbuf;
uint8_t outp;
- _mm_prefetch((void *)bufs[i + 3], 0);
+ rte_prefetch_non_temporal((void *)bufs[i + 3]);
/*
* workers should update in_port to hold the
* output port value
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
2015-12-06 15:24 [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2015-12-06 15:24 ` [dpdk-dev] [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
@ 2016-01-29 3:21 ` Jerin Jacob
2016-01-29 15:03 ` Bruce Richardson
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
3 siblings, 1 reply; 12+ messages in thread
From: Jerin Jacob @ 2016-01-29 3:21 UTC (permalink / raw)
To: dev; +Cc: viktorin
On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> gcc intrinsic and build examples/distributor for non 86 platform
ping for review.
>
> Not sure the rte_prefetch_non_temporal mapping correct for
> all the platforms. Architecture maintainers please check the mapping for
> rte_prefetch_non_temporal() for specific architecures
>
> Jerin Jacob (2):
> eal: introduce rte_prefetch_non_temporal
> examples/distributor: remove IA specific __mm_prefetch
>
> examples/distributor/main.c | 9 +++++----
> lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 5 +++++
> lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
> lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 5 +++++
> lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 5 +++++
> lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
> lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
> 7 files changed, 42 insertions(+), 4 deletions(-)
>
> --
> 2.1.0
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
2016-01-29 3:21 ` [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2016-01-29 15:03 ` Bruce Richardson
2016-01-29 16:53 ` Jerin Jacob
0 siblings, 1 reply; 12+ messages in thread
From: Bruce Richardson @ 2016-01-29 15:03 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, viktorin
On Fri, Jan 29, 2016 at 08:51:41AM +0530, Jerin Jacob wrote:
> On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> > Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> > gcc intrinsic and build examples/distributor for non 86 platform
>
> ping for review.
>
Is there much performance difference between making this a prefetch NT vs making
it an rte_prefetch0 on the platforms you have tested?
/Bruce
> >
> > Not sure the rte_prefetch_non_temporal mapping correct for
> > all the platforms. Architecture maintainers please check the mapping for
> > rte_prefetch_non_temporal() for specific architecures
> >
> > Jerin Jacob (2):
> > eal: introduce rte_prefetch_non_temporal
> > examples/distributor: remove IA specific __mm_prefetch
> >
> > examples/distributor/main.c | 9 +++++----
> > lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 5 +++++
> > lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
> > lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 5 +++++
> > lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 5 +++++
> > lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
> > lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
> > 7 files changed, 42 insertions(+), 4 deletions(-)
> >
> > --
> > 2.1.0
> >
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
2016-01-29 15:03 ` Bruce Richardson
@ 2016-01-29 16:53 ` Jerin Jacob
0 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-01-29 16:53 UTC (permalink / raw)
To: Bruce Richardson; +Cc: dev, viktorin
On Fri, Jan 29, 2016 at 08:03:37AM -0700, Bruce Richardson wrote:
> On Fri, Jan 29, 2016 at 08:51:41AM +0530, Jerin Jacob wrote:
> > On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> > > Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> > > gcc intrinsic and build examples/distributor for non 86 platform
> >
> > ping for review.
> >
>
> Is there much performance difference between making this a prefetch NT vs making
> it an rte_prefetch0 on the platforms you have tested?
Not much difference. But, I think its worth keep the abstraction as
IA and arm64 architecture supports it.
Jerin
>
> /Bruce
>
> > >
> > > Not sure the rte_prefetch_non_temporal mapping correct for
> > > all the platforms. Architecture maintainers please check the mapping for
> > > rte_prefetch_non_temporal() for specific architecures
> > >
> > > Jerin Jacob (2):
> > > eal: introduce rte_prefetch_non_temporal
> > > examples/distributor: remove IA specific __mm_prefetch
> > >
> > > examples/distributor/main.c | 9 +++++----
> > > lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 5 +++++
> > > lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
> > > lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 5 +++++
> > > lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 5 +++++
> > > lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
> > > lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
> > > 7 files changed, 42 insertions(+), 4 deletions(-)
> > >
> > > --
> > > 2.1.0
> > >
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2016-02-11 11:00 ` Thomas Monjalon
2016-02-11 11:43 ` Jan Viktorin
1 sibling, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2016-02-11 11:00 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, viktorin
Please arch maintainers, your ack would be appreciated.
2015-12-06 20:54, Jerin Jacob:
> +static inline void rte_prefetch_non_temporal(const volatile void *p)
> +{
> + rte_prefetch0(p);
> +}
A comment about using the same instruction for temporal and non-temporal?
> +/**
> + * Prefetch a cache line into all cache levels(non-temporal/transient version)
space missing before paren
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-11 11:00 ` Thomas Monjalon
@ 2016-02-11 11:43 ` Jan Viktorin
1 sibling, 0 replies; 12+ messages in thread
From: Jan Viktorin @ 2016-02-11 11:43 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev
I am OK with this patch.
On Sun, 6 Dec 2015 20:54:29 +0530
Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:
> non-temporal/transient/stream version of rte_prefetch0()
>
> The non-temporal prefetch is intended as a prefetch hint that processor
> will use the prefetched data only once or short period,
> unlike the rte_prefetch0() function which imply that
> prefetched data to use repeatedly.
>
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Jan Viktorin <viktorin@rehivetech.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 0/2] Fix examples/distributor build issue for non x86
2015-12-06 15:24 [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
` (2 preceding siblings ...)
2016-01-29 3:21 ` [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2016-02-12 11:13 ` Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
` (2 more replies)
3 siblings, 3 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
To: dev; +Cc: viktorin
Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
gcc intrinsic and build examples/distributor for non 86 platform
Not sure the rte_prefetch_non_temporal mapping correct for
all the platforms. Architecture maintainers please check the mapping for
rte_prefetch_non_temporal() for specific architecures
v1..v2
Addessed Thomas's review comments[1]
[1] http://dpdk.org/dev/patchwork/patch/9369/
Jerin Jacob (2):
eal: introduce rte_prefetch_non_temporal
examples/distributor: remove IA specific __mm_prefetch
examples/distributor/main.c | 9 +++++----
lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 6 ++++++
lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 6 ++++++
lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 6 ++++++
lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
7 files changed, 45 insertions(+), 4 deletions(-)
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
@ 2016-02-12 11:13 ` Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
2016-02-16 6:28 ` [dpdk-dev] [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon
2 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
To: dev; +Cc: viktorin
non-temporal/transient/stream version of rte_prefetch0()
The non-temporal prefetch is intended as a prefetch hint that processor
will use the prefetched data only once or short period,
unlike the rte_prefetch0() function which imply that
prefetched data to use repeatedly.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Jan Viktorin <viktorin@rehivetech.com>
---
lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h | 6 ++++++
lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h | 5 +++++
lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h | 6 ++++++
lib/librte_eal/common/include/arch/tile/rte_prefetch.h | 6 ++++++
lib/librte_eal/common/include/arch/x86/rte_prefetch.h | 5 +++++
lib/librte_eal/common/include/generic/rte_prefetch.h | 12 ++++++++++++
6 files changed, 40 insertions(+)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index b716384..5aeed22 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("pld [%0]" : : "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index f9cc62e..3ed46a4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index fea3be1..bcc7185 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
index c94075c..7a1bb93 100644
--- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
__builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 8e6e02c..5dac47e 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -55,6 +55,11 @@ static inline void rte_prefetch2(const volatile void *p)
asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
}
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
index 725715f..07e409e 100644
--- a/lib/librte_eal/common/include/generic/rte_prefetch.h
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -68,4 +68,16 @@ static inline void rte_prefetch1(const volatile void *p);
*/
static inline void rte_prefetch2(const volatile void *p);
+/**
+ * Prefetch a cache line into all cache levels (non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
#endif /* _RTE_PREFETCH_H_ */
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2016-02-12 11:13 ` Jerin Jacob
2016-02-16 6:28 ` [dpdk-dev] [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon
2 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
To: dev; +Cc: viktorin
use rte_prefetch_non_temporal() abstraction instead of _mm_prefetch(x, 0)
to in-order to build distributor application for non x86 platforms
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
examples/distributor/main.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/examples/distributor/main.c b/examples/distributor/main.c
index 4e74f8f..c0201a9 100644
--- a/examples/distributor/main.c
+++ b/examples/distributor/main.c
@@ -42,6 +42,7 @@
#include <rte_cycles.h>
#include <rte_malloc.h>
#include <rte_debug.h>
+#include <rte_prefetch.h>
#include <rte_distributor.h>
#define RX_RING_SIZE 256
@@ -335,13 +336,13 @@ lcore_tx(struct rte_ring *in_r)
/* for traffic we receive, queue it up for transmit */
uint16_t i;
- _mm_prefetch((void *)bufs[0], 0);
- _mm_prefetch((void *)bufs[1], 0);
- _mm_prefetch((void *)bufs[2], 0);
+ rte_prefetch_non_temporal((void *)bufs[0]);
+ rte_prefetch_non_temporal((void *)bufs[1]);
+ rte_prefetch_non_temporal((void *)bufs[2]);
for (i = 0; i < nb_rx; i++) {
struct output_buffer *outbuf;
uint8_t outp;
- _mm_prefetch((void *)bufs[i + 3], 0);
+ rte_prefetch_non_temporal((void *)bufs[i + 3]);
/*
* workers should update in_port to hold the
* output port value
--
2.1.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/2] Fix examples/distributor build issue for non x86
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
@ 2016-02-16 6:28 ` Thomas Monjalon
2 siblings, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2016-02-16 6:28 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev, viktorin
2016-02-12 16:43, Jerin Jacob:
> Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> gcc intrinsic and build examples/distributor for non 86 platform
>
> Not sure the rte_prefetch_non_temporal mapping correct for
> all the platforms. Architecture maintainers please check the mapping for
> rte_prefetch_non_temporal() for specific architecures
>
> v1..v2
>
> Addessed Thomas's review comments[1]
> [1] http://dpdk.org/dev/patchwork/patch/9369/
>
> Jerin Jacob (2):
> eal: introduce rte_prefetch_non_temporal
> examples/distributor: remove IA specific __mm_prefetch
Applied, thanks
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2016-02-16 6:30 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-06 15:24 [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2015-12-06 15:24 ` [dpdk-dev] [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-11 11:00 ` Thomas Monjalon
2016-02-11 11:43 ` Jan Viktorin
2015-12-06 15:24 ` [dpdk-dev] [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
2016-01-29 3:21 ` [dpdk-dev] [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2016-01-29 15:03 ` Bruce Richardson
2016-01-29 16:53 ` Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 " Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-12 11:13 ` [dpdk-dev] [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
2016-02-16 6:28 ` [dpdk-dev] [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).