* [dpdk-dev] [PATCH] use generated flags for SSE and AVX checks
@ 2016-12-05 15:34 Thomas Monjalon
2016-12-05 15:59 ` Thomas Monjalon
0 siblings, 1 reply; 3+ messages in thread
From: Thomas Monjalon @ 2016-12-05 15:34 UTC (permalink / raw)
To: dev
Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
generated by the DPDK makefile rte.cpuflags.mk.
Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
examples/l3fwd/l3fwd_em.c | 8 ++++----
examples/l3fwd/l3fwd_lpm.c | 6 +++---
examples/performance-thread/l3fwd-thread/main.c | 2 +-
lib/librte_eal/common/include/arch/x86/rte_vect.h | 14 +++++++-------
lib/librte_eal/common/include/rte_common.h | 2 +-
lib/librte_hash/rte_thash.h | 8 +++-----
lib/librte_sched/rte_sched.c | 2 +-
lib/librte_table/rte_lru.h | 2 +-
8 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 9cc4460..6714430 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -246,7 +246,7 @@ static rte_xmm_t mask0;
static rte_xmm_t mask1;
static rte_xmm_t mask2;
-#if defined(__SSE2__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE2)
static inline xmm_t
em_mask_key(void *key, xmm_t mask)
{
@@ -328,7 +328,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct)
return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
}
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
#if defined(NO_HASH_MULTI_LOOKUP)
#include "l3fwd_em_sse.h"
#else
@@ -709,13 +709,13 @@ em_main_loop(__attribute__((unused)) void *dummy)
if (nb_rx == 0)
continue;
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
l3fwd_em_send_packets(nb_rx, pkts_burst,
portid, qconf);
#else
l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
portid, qconf);
-#endif /* __SSE_4_1__ */
+#endif /* SSE_4_1 */
}
}
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index f621269..005534d 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -104,7 +104,7 @@ static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = {
struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
#include "l3fwd_lpm_sse.h"
#else
#include "l3fwd_lpm.h"
@@ -178,13 +178,13 @@ lpm_main_loop(__attribute__((unused)) void *dummy)
if (nb_rx == 0)
continue;
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
l3fwd_lpm_send_packets(nb_rx, pkts_burst,
portid, qconf);
#else
l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
portid, qconf);
-#endif /* __SSE_4_1__ */
+#endif /* SSE_4_1 */
}
}
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c
index fdc90b2..0917aa1 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -95,7 +95,7 @@
* When set to one, optimized forwarding path is enabled.
* Note that LPM optimisation path uses SSE4.1 instructions.
*/
-#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(RTE_MACHINE_CPUFLAG_SSE4_1))
#define ENABLE_MULTI_BUFFER_OPTIMIZE 0
#else
#define ENABLE_MULTI_BUFFER_OPTIMIZE 1
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index 77f2e25..56b53b7 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -44,23 +44,23 @@
#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-#ifdef __SSE__
+#ifdef RTE_MACHINE_CPUFLAG_SSE
#include <xmmintrin.h>
#endif
-#ifdef __SSE2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
#include <emmintrin.h>
#endif
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
#include <tmmintrin.h>
#endif
-#if defined(__SSE4_2__) || defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_SSE4_1)
#include <smmintrin.h>
#endif
-#if defined(__AVX__)
+#if defined(RTE_MACHINE_CPUFLAG_AVX)
#include <immintrin.h>
#endif
@@ -88,7 +88,7 @@ typedef union rte_xmm {
double pd[XMM_SIZE / sizeof(double)];
} rte_xmm_t;
-#ifdef __AVX__
+#ifdef RTE_MACHINE_CPUFLAG_AVX
typedef __m256i ymm_t;
@@ -105,7 +105,7 @@ typedef union rte_ymm {
double pd[YMM_SIZE / sizeof(double)];
} rte_ymm_t;
-#endif /* __AVX__ */
+#endif /* AVX */
#ifdef RTE_ARCH_I686
#define _mm_cvtsi128_si64(a) \
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index db5ac91..bc0f4cd 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -294,7 +294,7 @@ rte_align64pow2(uint64_t v)
/*********** Other general functions / macros ********/
-#ifdef __SSE2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
#include <emmintrin.h>
/**
* PAUSE instruction for tight loops (avoid busy waiting)
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index a4886a8..9a352bd 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -56,11 +56,9 @@ extern "C" {
#include <rte_ip.h>
#include <rte_common.h>
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
#include <rte_vect.h>
-#endif
-#ifdef __SSE3__
/* Byte swap mask used for converting IPv6 address
* 4-byte chunks to CPU byte order
*/
@@ -134,7 +132,7 @@ struct rte_ipv6_tuple {
union rte_thash_tuple {
struct rte_ipv4_tuple v4;
struct rte_ipv6_tuple v6;
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
} __attribute__((aligned(XMM_SIZE)));
#else
};
@@ -169,7 +167,7 @@ rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
static inline void
rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ)
{
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
__m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr);
*(__m128i *)targ->v6.src_addr =
_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index e6dace2..c593363 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -56,7 +56,7 @@
#ifdef RTE_SCHED_VECTOR
#include <rte_vect.h>
-#if defined(__SSE4__)
+#ifdef RTE_MACHINE_CPUFLAG_SSE4
#define SCHED_VECTOR_SSE4
#endif
diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h
index e87e062..3d677c8 100644
--- a/lib/librte_table/rte_lru.h
+++ b/lib/librte_table/rte_lru.h
@@ -47,7 +47,7 @@ extern "C" {
#endif
#ifndef RTE_TABLE_HASH_LRU_STRATEGY
-#ifdef __SSE4_2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
#define RTE_TABLE_HASH_LRU_STRATEGY 2
#else /* if no SSE, use simple scalar version */
#define RTE_TABLE_HASH_LRU_STRATEGY 1
--
2.7.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] use generated flags for SSE and AVX checks
2016-12-05 15:34 [dpdk-dev] [PATCH] use generated flags for SSE and AVX checks Thomas Monjalon
@ 2016-12-05 15:59 ` Thomas Monjalon
2019-01-15 18:23 ` Ferruh Yigit
0 siblings, 1 reply; 3+ messages in thread
From: Thomas Monjalon @ 2016-12-05 15:59 UTC (permalink / raw)
To: dev
2016-12-05 16:34, Thomas Monjalon:
> Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
> generated by the DPDK makefile rte.cpuflags.mk.
This patch does not work because RTE_MACHINE_CPUFLAG_* are generated
for the whole library when including rte.vars.mk.
So the flags are not accurate when overriding the flags per file like
it is done in rte_acl.
So the questions are:
- should we use RTE_MACHINE_CPUFLAG_?
- should we override the flags per file?
- will we be able to use the function attribute __target__?
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dpdk-dev] [PATCH] use generated flags for SSE and AVX checks
2016-12-05 15:59 ` Thomas Monjalon
@ 2019-01-15 18:23 ` Ferruh Yigit
0 siblings, 0 replies; 3+ messages in thread
From: Ferruh Yigit @ 2019-01-15 18:23 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: dpdk-dev
On 12/5/2016 3:59 PM, thomas.monjalon at 6wind.com (Thomas Monjalon) wrote:
> 2016-12-05 16:34, Thomas Monjalon:
>> Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
>> generated by the DPDK makefile rte.cpuflags.mk.
>
> This patch does not work because RTE_MACHINE_CPUFLAG_* are generated
> for the whole library when including rte.vars.mk.
> So the flags are not accurate when overriding the flags per file like
> it is done in rte_acl.
>
> So the questions are:
> - should we use RTE_MACHINE_CPUFLAG_?
> - should we override the flags per file?
> - will we be able to use the function attribute __target__?
>
Hi Thomas,
This patch is waiting for a comment for two years, is it still valid, if not can
we mark it as rejected?
For record, it is: https://patches.dpdk.org/patch/17684/
Thanks,
ferruh
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-01-15 18:23 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-05 15:34 [dpdk-dev] [PATCH] use generated flags for SSE and AVX checks Thomas Monjalon
2016-12-05 15:59 ` Thomas Monjalon
2019-01-15 18:23 ` Ferruh Yigit
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).