From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6D62343343; Thu, 16 Nov 2023 11:13:01 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id EECE0402B2; Thu, 16 Nov 2023 11:13:00 +0100 (CET) Received: from mail.lysator.liu.se (mail.lysator.liu.se [130.236.254.3]) by mails.dpdk.org (Postfix) with ESMTP id EF0854027D for ; Thu, 16 Nov 2023 11:12:59 +0100 (CET) Received: from mail.lysator.liu.se (localhost [127.0.0.1]) by mail.lysator.liu.se (Postfix) with ESMTP id 4E67DB12E for ; Thu, 16 Nov 2023 11:12:59 +0100 (CET) Received: by mail.lysator.liu.se (Postfix, from userid 1004) id 2DBE4B617; Thu, 16 Nov 2023 11:12:59 +0100 (CET) X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on hermod.lysator.liu.se X-Spam-Level: X-Spam-Status: No, score=-1.5 required=5.0 tests=ALL_TRUSTED,AWL, T_SCC_BODY_TEXT_LINE autolearn=disabled version=3.4.6 X-Spam-Score: -1.5 Received: from [192.168.1.59] (h-62-63-215-114.A163.priv.bahnhof.se [62.63.215.114]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange ECDHE (P-256) server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mail.lysator.liu.se (Postfix) with ESMTPSA id DAEE4B262; Thu, 16 Nov 2023 11:12:55 +0100 (CET) Message-ID: Date: Thu, 16 Nov 2023 11:12:55 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH] eal: use C11 alignas instead of GCC attribute aligned To: Tyler Retzlaff , dev@dpdk.org Cc: =?UTF-8?Q?Mattias_R=C3=B6nnblom?= , Anatoly Burakov , Bruce Richardson , David Christensen , Harry van Haaren , Konstantin Ananyev , Min Zhou , Ruifeng Wang , Stanislaw Kardach References: <1700069997-4399-1-git-send-email-roretzla@linux.microsoft.com> <1700069997-4399-2-git-send-email-roretzla@linux.microsoft.com> Content-Language: en-US From: =?UTF-8?Q?Mattias_R=C3=B6nnblom?= In-Reply-To: <1700069997-4399-2-git-send-email-roretzla@linux.microsoft.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Virus-Scanned: ClamAV using ClamSMTP X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org On 2023-11-15 18:39, Tyler Retzlaff wrote: > Now that we have enabled C11 replace the use of __rte_cache_aligned > and __rte_aligned(n) with alignas(RTE_CACHE_LINE_SIZE) and > __rte_aligned(n) respectively. > > Signed-off-by: Tyler Retzlaff > --- > lib/eal/arm/include/rte_vect.h | 4 +++- > lib/eal/common/malloc_elem.h | 4 +++- > lib/eal/common/malloc_heap.h | 4 +++- > lib/eal/common/rte_keepalive.c | 4 +++- > lib/eal/common/rte_random.c | 5 ++++- > lib/eal/common/rte_service.c | 7 +++++-- > lib/eal/include/generic/rte_atomic.h | 4 +++- > lib/eal/loongarch/include/rte_vect.h | 7 +++++-- > lib/eal/ppc/include/rte_vect.h | 5 ++++- > lib/eal/riscv/include/rte_vect.h | 4 +++- > lib/eal/x86/include/rte_vect.h | 4 +++- > lib/eal/x86/rte_power_intrinsics.c | 8 ++++++-- > 12 files changed, 45 insertions(+), 15 deletions(-) > > diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h > index 8cfe4bd..c7a3b2e 100644 > --- a/lib/eal/arm/include/rte_vect.h > +++ b/lib/eal/arm/include/rte_vect.h > @@ -5,6 +5,7 @@ > #ifndef _RTE_VECT_ARM_H_ > #define _RTE_VECT_ARM_H_ > > +#include > #include > #include "generic/rte_vect.h" > #include "rte_debug.h" > @@ -25,13 +26,14 @@ > #define XMM_MASK (XMM_SIZE - 1) > > typedef union rte_xmm { > + alignas(16) > xmm_t x > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32) > /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ > diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h > index 952ce73..c2c336e 100644 > --- a/lib/eal/common/malloc_elem.h > +++ b/lib/eal/common/malloc_elem.h > @@ -5,6 +5,7 @@ > #ifndef MALLOC_ELEM_H_ > #define MALLOC_ELEM_H_ > > +#include > #include > > #include > @@ -21,6 +22,7 @@ enum elem_state { > }; > > struct malloc_elem { > + alignas(RTE_CACHE_LINE_SIZE) > struct malloc_heap *heap; > struct malloc_elem *volatile prev; > /**< points to prev elem in memseg */ > @@ -48,7 +50,7 @@ struct malloc_elem { > size_t user_size; > uint64_t asan_cookie[2]; /* must be next to header_cookie */ > #endif > -} __rte_cache_aligned; > +}; > > static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem); > > diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h > index 8f3ab57..a724bfb 100644 > --- a/lib/eal/common/malloc_heap.h > +++ b/lib/eal/common/malloc_heap.h > @@ -5,6 +5,7 @@ > #ifndef MALLOC_HEAP_H_ > #define MALLOC_HEAP_H_ > > +#include > #include > #include > > @@ -22,6 +23,7 @@ > * Structure to hold malloc heap > */ > struct malloc_heap { > + alignas(RTE_CACHE_LINE_SIZE) > rte_spinlock_t lock; > LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; > struct malloc_elem *volatile first; > @@ -31,7 +33,7 @@ struct malloc_heap { > unsigned int socket_id; > size_t total_size; > char name[RTE_HEAP_NAME_MAX_LEN]; > -} __rte_cache_aligned; > +}; > > void * > malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags, > diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c > index e0494b2..67a898d 100644 > --- a/lib/eal/common/rte_keepalive.c > +++ b/lib/eal/common/rte_keepalive.c > @@ -3,6 +3,7 @@ > */ > > #include > +#include > > #include > #include > @@ -17,7 +18,8 @@ struct rte_keepalive { > /* > * Each element must be cache aligned to prevent false sharing. > */ > - enum rte_keepalive_state core_state __rte_cache_aligned; > + alignas(RTE_CACHE_LINE_SIZE) > + enum rte_keepalive_state core_state; > } live_data[RTE_KEEPALIVE_MAXCORES]; > > /** Last-seen-alive timestamps */ > diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c > index 7709b8f..c04917e 100644 > --- a/lib/eal/common/rte_random.c > +++ b/lib/eal/common/rte_random.c > @@ -2,6 +2,8 @@ > * Copyright(c) 2019 Ericsson AB > */ > > +#include > + > #ifdef __RDSEED__ > #include > #endif > @@ -14,13 +16,14 @@ > #include > > struct rte_rand_state { > + alignas(RTE_CACHE_LINE_SIZE) > uint64_t z1; Formatting convention question: the alignas(n) and the field shouldn't be on the same line? It could be useful to have a macro, so it would be: RTE_CACHE_ALIGNAS uint64_t z1; ...which is horter than: alignas(RTE_CACHE_LINE_SIZE) uint64_t z1; and by tomorrow, it will feel as natural and obvious as the open-coded version. I don't know. Just some thoughts. > uint64_t z2; > uint64_t z3; > uint64_t z4; > uint64_t z5; > RTE_CACHE_GUARD; > -} __rte_cache_aligned; > +}; > > /* One instance each for every lcore id-equipped thread, and one > * additional instance to be shared by all others threads (i.e., all > diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c > index e183d2e..861ae31 100644 > --- a/lib/eal/common/rte_service.c > +++ b/lib/eal/common/rte_service.c > @@ -2,6 +2,7 @@ > * Copyright(c) 2017 Intel Corporation > */ > > +#include > #include > #include > #include > @@ -33,6 +34,7 @@ > > /* internal representation of a service */ > struct rte_service_spec_impl { > + alignas(RTE_CACHE_LINE_SIZE) > /* public part of the struct */ > struct rte_service_spec spec; > > @@ -53,7 +55,7 @@ struct rte_service_spec_impl { > * on currently. > */ > RTE_ATOMIC(uint32_t) num_mapped_cores; > -} __rte_cache_aligned; > +}; > > struct service_stats { > RTE_ATOMIC(uint64_t) calls; > @@ -62,6 +64,7 @@ struct service_stats { > > /* the internal values of a service core */ > struct core_state { > + alignas(RTE_CACHE_LINE_SIZE) > /* map of services IDs are run on this core */ > uint64_t service_mask; > RTE_ATOMIC(uint8_t) runstate; /* running or stopped */ > @@ -71,7 +74,7 @@ struct core_state { > RTE_ATOMIC(uint64_t) loops; > RTE_ATOMIC(uint64_t) cycles; > struct service_stats service_stats[RTE_SERVICE_NUM_MAX]; > -} __rte_cache_aligned; > +}; > > static uint32_t rte_service_count; > static struct rte_service_spec_impl *rte_services; > diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h > index 0e639da..bc9213c 100644 > --- a/lib/eal/include/generic/rte_atomic.h > +++ b/lib/eal/include/generic/rte_atomic.h > @@ -12,6 +12,7 @@ > * This file defines a generic API for atomic operations. > */ > > +#include > #include > > #include > @@ -1096,6 +1097,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v) > */ > typedef struct { > union { > + alignas(16) > uint64_t val[2]; > #ifdef RTE_ARCH_64 > #ifndef RTE_TOOLCHAIN_MSVC > @@ -1103,7 +1105,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v) > #endif > #endif > }; > -} __rte_aligned(16) rte_int128_t; > +} rte_int128_t; > > #ifdef __DOXYGEN__ > > diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h > index 1546515..856d87b 100644 > --- a/lib/eal/loongarch/include/rte_vect.h > +++ b/lib/eal/loongarch/include/rte_vect.h > @@ -5,6 +5,7 @@ > #ifndef RTE_VECT_LOONGARCH_H > #define RTE_VECT_LOONGARCH_H > > +#include > #include > #include "generic/rte_vect.h" > #include "rte_common.h" > @@ -16,6 +17,7 @@ > #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED > > typedef union xmm { > + alignas(16) > int8_t i8[16]; > int16_t i16[8]; > int32_t i32[4]; > @@ -25,19 +27,20 @@ > uint32_t u32[4]; > uint64_t u64[2]; > double pd[2]; > -} __rte_aligned(16) xmm_t; > +} xmm_t; > > #define XMM_SIZE (sizeof(xmm_t)) > #define XMM_MASK (XMM_SIZE - 1) > > typedef union rte_xmm { > + alignas(16) > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > static inline xmm_t > vect_load_128(void *p) > diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h > index a5f009b..e6702a4 100644 > --- a/lib/eal/ppc/include/rte_vect.h > +++ b/lib/eal/ppc/include/rte_vect.h > @@ -6,6 +6,8 @@ > #ifndef _RTE_VECT_PPC_64_H_ > #define _RTE_VECT_PPC_64_H_ > > +#include > + > #include "rte_altivec.h" > > #include "generic/rte_vect.h" > @@ -23,13 +25,14 @@ > #define XMM_MASK (XMM_SIZE - 1) > > typedef union rte_xmm { > + alignas(16) > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(16) rte_xmm_t; > +} rte_xmm_t; > > #ifdef __cplusplus > } > diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h > index 2f97f43..32d4386 100644 > --- a/lib/eal/riscv/include/rte_vect.h > +++ b/lib/eal/riscv/include/rte_vect.h > @@ -7,6 +7,7 @@ > #ifndef RTE_VECT_RISCV_H > #define RTE_VECT_RISCV_H > > +#include > #include > #include "generic/rte_vect.h" > #include "rte_common.h" > @@ -23,13 +24,14 @@ > #define XMM_MASK (XMM_SIZE - 1) > > typedef union rte_xmm { > + alignas(16) /* !! NOTE !! changed to 16 it looks like this was a bug? */ > xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; > double pd[XMM_SIZE / sizeof(double)]; > -} __rte_aligned(8) rte_xmm_t; > +} rte_xmm_t; > > static inline xmm_t > vect_load_128(void *p) > diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h > index 560f9e4..2e5669d 100644 > --- a/lib/eal/x86/include/rte_vect.h > +++ b/lib/eal/x86/include/rte_vect.h > @@ -11,6 +11,7 @@ > * RTE SSE/AVX related header. > */ > > +#include > #include > #include > #include > @@ -92,6 +93,7 @@ > #define RTE_X86_ZMM_MASK (RTE_X86_ZMM_SIZE - 1) > > typedef union __rte_x86_zmm { > + alignas(RTE_X86_ZMM_SIZE) > __m512i z; > ymm_t y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)]; > xmm_t x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)]; > @@ -100,7 +102,7 @@ > uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)]; > uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)]; > double pd[RTE_X86_ZMM_SIZE / sizeof(double)]; > -} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t; > +} __rte_x86_zmm_t; > > #endif /* __AVX512F__ */ > > diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c > index 532a2e6..5636543 100644 > --- a/lib/eal/x86/rte_power_intrinsics.c > +++ b/lib/eal/x86/rte_power_intrinsics.c > @@ -2,6 +2,8 @@ > * Copyright(c) 2020 Intel Corporation > */ > > +#include > + > #include > #include > #include > @@ -13,9 +15,10 @@ > * Per-lcore structure holding current status of C0.2 sleeps. > */ > static struct power_wait_status { > + alignas(RTE_CACHE_LINE_SIZE) > rte_spinlock_t lock; > volatile void *monitor_addr; /**< NULL if not currently sleeping */ > -} __rte_cache_aligned wait_status[RTE_MAX_LCORE]; > +} wait_status[RTE_MAX_LCORE]; > > /* > * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state. > @@ -86,9 +89,10 @@ static void amd_mwaitx(const uint64_t timeout) > } > > static struct { > + alignas(RTE_CACHE_LINE_SIZE) > void (*mmonitor)(volatile void *addr); > void (*mwait)(const uint64_t timeout); > -} __rte_cache_aligned power_monitor_ops; > +} power_monitor_ops; > > static inline void > __umwait_wakeup(volatile void *addr)