From: Eli Britstein <elibr@nvidia.com>
To: <dev@dpdk.org>
Cc: Matan Azrad <matan@nvidia.com>, Asaf Penso <asafp@nvidia.com>,
"Slava Ovsiienko" <viacheslavo@nvidia.com>,
Thomas Monjalon <thomas@monjalon.net>,
<bruce.richardson@intel.com>, <konstantin.ananyev@intel.com>,
<olivier.matz@6wind.com>, Eli Britstein <elibr@nvidia.com>,
<stable@dpdk.org>
Subject: [dpdk-dev] [PATCH V2 3/3] eal/x86: avoid cast-align warning in x86 memcpy functions
Date: Thu, 21 Oct 2021 11:51:32 +0300 [thread overview]
Message-ID: <20211021085132.12672-3-elibr@nvidia.com> (raw)
In-Reply-To: <20211021085132.12672-1-elibr@nvidia.com>
Functions and macros in x86 rte_memcpy.h may cause cast-align warnings,
when using strict cast align flag with supporting gcc:
gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
CFLAGS="-Wcast-align=strict" make V=1 -C examples/l2fwd clean static
For example:
In file included from main.c:24:
/dpdk/build/include/rte_memcpy.h: In function 'rte_mov16':
/dpdk/build/include/rte_memcpy.h:306:25: warning: cast increases
required alignment of target type [-Wcast-align]
306 | xmm0 = _mm_loadu_si128((const __m128i *)src);
| ^
As the code assumes correct alignment, add first a (void *) or (const
void *) castings, to avoid the warnings.
Fixes: 9484092baad3 ("eal/x86: optimize memcpy for AVX512 platforms")
Cc: stable@dpdk.org
Signed-off-by: Eli Britstein <elibr@nvidia.com>
---
lib/eal/x86/include/rte_memcpy.h | 80 ++++++++++++++++++--------------
1 file changed, 44 insertions(+), 36 deletions(-)
diff --git a/lib/eal/x86/include/rte_memcpy.h b/lib/eal/x86/include/rte_memcpy.h
index 79f381dd9b..1b6c6e585f 100644
--- a/lib/eal/x86/include/rte_memcpy.h
+++ b/lib/eal/x86/include/rte_memcpy.h
@@ -303,8 +303,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
- xmm0 = _mm_loadu_si128((const __m128i *)src);
- _mm_storeu_si128((__m128i *)dst, xmm0);
+ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)src);
+ _mm_storeu_si128((__m128i *)(void *)dst, xmm0);
}
/**
@@ -316,8 +316,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
- ymm0 = _mm256_loadu_si256((const __m256i *)src);
- _mm256_storeu_si256((__m256i *)dst, ymm0);
+ ymm0 = _mm256_loadu_si256((const __m256i *)(const void *)src);
+ _mm256_storeu_si256((__m256i *)(void *)dst, ymm0);
}
/**
@@ -354,16 +354,24 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
__m256i ymm0, ymm1, ymm2, ymm3;
while (n >= 128) {
- ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+ ymm0 = _mm256_loadu_si256((const __m256i *)(const void *)
+ ((const uint8_t *)src + 0 * 32));
n -= 128;
- ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
- ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
- ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
+ ymm1 = _mm256_loadu_si256((const __m256i *)(const void *)
+ ((const uint8_t *)src + 1 * 32));
+ ymm2 = _mm256_loadu_si256((const __m256i *)(const void *)
+ ((const uint8_t *)src + 2 * 32));
+ ymm3 = _mm256_loadu_si256((const __m256i *)(const void *)
+ ((const uint8_t *)src + 3 * 32));
src = (const uint8_t *)src + 128;
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
+ _mm256_storeu_si256((__m256i *)(void *)
+ ((uint8_t *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)(void *)
+ ((uint8_t *)dst + 1 * 32), ymm1);
+ _mm256_storeu_si256((__m256i *)(void *)
+ ((uint8_t *)dst + 2 * 32), ymm2);
+ _mm256_storeu_si256((__m256i *)(void *)
+ ((uint8_t *)dst + 3 * 32), ymm3);
dst = (uint8_t *)dst + 128;
}
}
@@ -496,8 +504,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
- xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src);
- _mm_storeu_si128((__m128i *)dst, xmm0);
+ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)src);
+ _mm_storeu_si128((__m128i *)(void *)dst, xmm0);
}
/**
@@ -581,25 +589,25 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
__extension__ ({ \
size_t tmp; \
while (len >= 128 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 0 * 16)); \
len -= 128; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
- xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \
- xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \
- xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \
- xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \
- xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \
- xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \
+ xmm1 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 2 * 16)); \
+ xmm3 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 3 * 16)); \
+ xmm4 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 4 * 16)); \
+ xmm5 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 5 * 16)); \
+ xmm6 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 6 * 16)); \
+ xmm7 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 7 * 16)); \
+ xmm8 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 8 * 16)); \
src = (const uint8_t *)src + 128; \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
dst = (uint8_t *)dst + 128; \
} \
tmp = len; \
@@ -609,13 +617,13 @@ __extension__ ({
dst = (uint8_t *)dst + tmp; \
if (len >= 32 + 16 - offset) { \
while (len >= 32 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 0 * 16)); \
len -= 32; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
+ xmm1 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 2 * 16)); \
src = (const uint8_t *)src + 32; \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
dst = (uint8_t *)dst + 32; \
} \
tmp = len; \
--
2.28.0.2311.g225365fb51
next prev parent reply other threads:[~2021-10-21 8:52 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-13 6:49 [dpdk-dev] [PATCH 0/3] Avoid cast-align warnings Eli Britstein
2021-07-13 6:49 ` [dpdk-dev] [PATCH 1/3] net: avoid cast-align warning in VLAN insert function Eli Britstein
2021-07-30 10:57 ` Olivier Matz
2021-07-13 6:49 ` [dpdk-dev] [PATCH 2/3] mbuf: avoid cast-align warning in pktmbuf mtod offset macro Eli Britstein
2021-07-13 7:43 ` Thomas Monjalon
2021-07-28 15:28 ` Olivier Matz
2021-07-29 7:13 ` Eli Britstein
2021-07-30 11:10 ` Olivier Matz
2021-08-01 8:06 ` Eli Britstein
2021-10-19 6:41 ` Eli Britstein
2021-10-19 9:47 ` Olivier Matz
2021-07-13 6:49 ` [dpdk-dev] [PATCH 3/3] eal/x86: avoid cast-align warning in x86 memcpy functions Eli Britstein
2021-10-21 8:51 ` [dpdk-dev] [PATCH V2 1/3] net: avoid cast-align warning in VLAN insert function Eli Britstein
2021-10-21 8:51 ` [dpdk-dev] [PATCH V2 2/3] mbuf: avoid cast-align warning in pktmbuf mtod offset macro Eli Britstein
2021-10-21 8:51 ` Eli Britstein [this message]
2021-10-25 15:29 ` [dpdk-dev] [dpdk-stable] [PATCH V2 3/3] eal/x86: avoid cast-align warning in x86 memcpy functions Thomas Monjalon
2021-10-21 15:48 ` [dpdk-dev] [PATCH V2 1/3] net: avoid cast-align warning in VLAN insert function Stephen Hemminger
2021-10-21 16:16 ` Eli Britstein
2021-10-21 16:22 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211021085132.12672-3-elibr@nvidia.com \
--to=elibr@nvidia.com \
--cc=asafp@nvidia.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=konstantin.ananyev@intel.com \
--cc=matan@nvidia.com \
--cc=olivier.matz@6wind.com \
--cc=stable@dpdk.org \
--cc=thomas@monjalon.net \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).