* [PATCH] ring: compilation fix with GCC-12
@ 2022-08-05 9:03 Amit Prakash Shukla
2022-08-05 15:37 ` Stephen Hemminger
2022-08-06 18:35 ` Honnappa Nagarahalli
0 siblings, 2 replies; 10+ messages in thread
From: Amit Prakash Shukla @ 2022-08-05 9:03 UTC (permalink / raw)
To: Honnappa Nagarahalli, Konstantin Ananyev
Cc: dev, jerinj, Amit Prakash Shukla, stable
GCC 12 raises the following warning:
In function '__rte_ring_dequeue_elems_128',
inlined from '__rte_ring_dequeue_elems' at
../lib/ring/rte_ring_elem_pvt.h:262:3,
inlined from '__rte_ring_do_hts_dequeue_elem' at
../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
../lib/ring/rte_ring_hts.h:83:9,
inlined from 'rte_ring_dequeue_bulk_elem' at
../lib/ring/rte_ring_elem.h:391:10,
inlined from 'rte_ring_dequeue_elem' at
../lib/ring/rte_ring_elem.h:476:9,
inlined from 'rte_ring_dequeue' at
../lib/ring/rte_ring.h:463:9,
inlined from 'rxa_intr_ring_dequeue' at
../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
32 bytes into a region of size 8 overflows the destination
[-Werror=stringop-overflow=]
234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
Also it would be better to change to rte_memcpy as the function is
called in fastpath.
Bugzilla ID: 1062
Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
Cc: stable@dpdk.org
Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
index 83788c56e6..3d85b13333 100644
--- a/lib/ring/rte_ring_elem_pvt.h
+++ b/lib/ring/rte_ring_elem_pvt.h
@@ -10,6 +10,8 @@
#ifndef _RTE_RING_ELEM_PVT_H_
#define _RTE_RING_ELEM_PVT_H_
+#include <rte_memcpy.h>
+
static __rte_always_inline void
__rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
uint32_t idx, const void *obj_table, uint32_t n)
@@ -97,20 +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head,
const rte_int128_t *obj = (const rte_int128_t *)obj_table;
if (likely(idx + n <= size)) {
for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
- memcpy((void *)(ring + idx),
+ rte_memcpy((void *)(ring + idx),
(const void *)(obj + i), 32);
switch (n & 0x1) {
case 1:
- memcpy((void *)(ring + idx),
+ rte_memcpy((void *)(ring + idx),
(const void *)(obj + i), 16);
}
} else {
for (i = 0; idx < size; i++, idx++)
- memcpy((void *)(ring + idx),
+ rte_memcpy((void *)(ring + idx),
(const void *)(obj + i), 16);
/* Start at the beginning */
for (idx = 0; i < n; i++, idx++)
- memcpy((void *)(ring + idx),
+ rte_memcpy((void *)(ring + idx),
(const void *)(obj + i), 16);
}
}
@@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head,
rte_int128_t *obj = (rte_int128_t *)obj_table;
if (likely(idx + n <= size)) {
for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
- memcpy((void *)(obj + i), (void *)(ring + idx), 32);
+ rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
switch (n & 0x1) {
case 1:
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
}
} else {
for (i = 0; idx < size; i++, idx++)
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
/* Start at the beginning */
for (idx = 0; i < n; i++, idx++)
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
}
}
--
2.25.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] ring: compilation fix with GCC-12
2022-08-05 9:03 [PATCH] ring: compilation fix with GCC-12 Amit Prakash Shukla
@ 2022-08-05 15:37 ` Stephen Hemminger
2022-08-06 18:35 ` Honnappa Nagarahalli
1 sibling, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2022-08-05 15:37 UTC (permalink / raw)
To: Amit Prakash Shukla
Cc: Honnappa Nagarahalli, Konstantin Ananyev, dev, jerinj, stable
On Fri, 5 Aug 2022 14:33:48 +0530
Amit Prakash Shukla <amitprakashs@marvell.com> wrote:
> GCC 12 raises the following warning:
>
> In function '__rte_ring_dequeue_elems_128',
> inlined from '__rte_ring_dequeue_elems' at
> ../lib/ring/rte_ring_elem_pvt.h:262:3,
> inlined from '__rte_ring_do_hts_dequeue_elem' at
> ../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> ../lib/ring/rte_ring_hts.h:83:9,
> inlined from 'rte_ring_dequeue_bulk_elem' at
> ../lib/ring/rte_ring_elem.h:391:10,
> inlined from 'rte_ring_dequeue_elem' at
> ../lib/ring/rte_ring_elem.h:476:9,
> inlined from 'rte_ring_dequeue' at
> ../lib/ring/rte_ring.h:463:9,
> inlined from 'rxa_intr_ring_dequeue' at
> ../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> 32 bytes into a region of size 8 overflows the destination
> [-Werror=stringop-overflow=]
> 234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> Also it would be better to change to rte_memcpy as the function is
> called in fastpath.
>
> Bugzilla ID: 1062
> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> Cc: stable@dpdk.org
>
> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
This seems to be just using a different routine and in effect
wallpapering over a possibly valid warning.
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] ring: compilation fix with GCC-12
2022-08-05 9:03 [PATCH] ring: compilation fix with GCC-12 Amit Prakash Shukla
2022-08-05 15:37 ` Stephen Hemminger
@ 2022-08-06 18:35 ` Honnappa Nagarahalli
2022-08-07 12:26 ` Konstantin Ananyev
1 sibling, 1 reply; 10+ messages in thread
From: Honnappa Nagarahalli @ 2022-08-06 18:35 UTC (permalink / raw)
To: Amit Prakash Shukla, Konstantin Ananyev; +Cc: dev, jerinj, stable, nd, nd
<snip>
>
> GCC 12 raises the following warning:
>
> In function '__rte_ring_dequeue_elems_128',
> inlined from '__rte_ring_dequeue_elems' at
> ../lib/ring/rte_ring_elem_pvt.h:262:3,
> inlined from '__rte_ring_do_hts_dequeue_elem' at
> ../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> ../lib/ring/rte_ring_hts.h:83:9,
> inlined from 'rte_ring_dequeue_bulk_elem' at
> ../lib/ring/rte_ring_elem.h:391:10,
> inlined from 'rte_ring_dequeue_elem' at
> ../lib/ring/rte_ring_elem.h:476:9,
> inlined from 'rte_ring_dequeue' at
> ../lib/ring/rte_ring.h:463:9,
> inlined from 'rxa_intr_ring_dequeue' at
> ../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> 32 bytes into a region of size 8 overflows the destination
> [-Werror=stringop-overflow=]
> 234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
Any reason why this replacement fixes the problem?
Do you have any performance numbers with this change?
> Also it would be better to change to rte_memcpy as the function is called in
> fastpath.
On Arm platforms, memcpy in the later versions has the best performance.
>
> Bugzilla ID: 1062
> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> Cc: stable@dpdk.org
>
> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> ---
> lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
> 1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h index
> 83788c56e6..3d85b13333 100644
> --- a/lib/ring/rte_ring_elem_pvt.h
> +++ b/lib/ring/rte_ring_elem_pvt.h
> @@ -10,6 +10,8 @@
> #ifndef _RTE_RING_ELEM_PVT_H_
> #define _RTE_RING_ELEM_PVT_H_
>
> +#include <rte_memcpy.h>
> +
> static __rte_always_inline void
> __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
> uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
> prod_head,
> const rte_int128_t *obj = (const rte_int128_t *)obj_table;
> if (likely(idx + n <= size)) {
> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> - memcpy((void *)(ring + idx),
> + rte_memcpy((void *)(ring + idx),
> (const void *)(obj + i), 32);
> switch (n & 0x1) {
> case 1:
> - memcpy((void *)(ring + idx),
> + rte_memcpy((void *)(ring + idx),
> (const void *)(obj + i), 16);
> }
> } else {
> for (i = 0; idx < size; i++, idx++)
> - memcpy((void *)(ring + idx),
> + rte_memcpy((void *)(ring + idx),
> (const void *)(obj + i), 16);
> /* Start at the beginning */
> for (idx = 0; i < n; i++, idx++)
> - memcpy((void *)(ring + idx),
> + rte_memcpy((void *)(ring + idx),
> (const void *)(obj + i), 16);
> }
> }
> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r,
> uint32_t prod_head,
> rte_int128_t *obj = (rte_int128_t *)obj_table;
> if (likely(idx + n <= size)) {
> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> - memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> switch (n & 0x1) {
> case 1:
> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> }
> } else {
> for (i = 0; idx < size; i++, idx++)
> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> /* Start at the beginning */
> for (idx = 0; i < n; i++, idx++)
> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> }
> }
>
> --
> 2.25.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] ring: compilation fix with GCC-12
2022-08-06 18:35 ` Honnappa Nagarahalli
@ 2022-08-07 12:26 ` Konstantin Ananyev
2022-08-23 9:38 ` [EXT] " Amit Prakash Shukla
0 siblings, 1 reply; 10+ messages in thread
From: Konstantin Ananyev @ 2022-08-07 12:26 UTC (permalink / raw)
To: Honnappa Nagarahalli, Amit Prakash Shukla; +Cc: dev, jerinj, stable, nd
06/08/2022 19:35, Honnappa Nagarahalli пишет:
> <snip>
>
>>
>> GCC 12 raises the following warning:
>>
>> In function '__rte_ring_dequeue_elems_128',
>> inlined from '__rte_ring_dequeue_elems' at
>> ../lib/ring/rte_ring_elem_pvt.h:262:3,
>> inlined from '__rte_ring_do_hts_dequeue_elem' at
>> ../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
>> inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
>> ../lib/ring/rte_ring_hts.h:83:9,
>> inlined from 'rte_ring_dequeue_bulk_elem' at
>> ../lib/ring/rte_ring_elem.h:391:10,
>> inlined from 'rte_ring_dequeue_elem' at
>> ../lib/ring/rte_ring_elem.h:476:9,
>> inlined from 'rte_ring_dequeue' at
>> ../lib/ring/rte_ring.h:463:9,
>> inlined from 'rxa_intr_ring_dequeue' at
>> ../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
>> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
>> 32 bytes into a region of size 8 overflows the destination
>> [-Werror=stringop-overflow=]
>> 234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>>
>> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> Any reason why this replacement fixes the problem?
> Do you have any performance numbers with this change?
>
>> Also it would be better to change to rte_memcpy as the function is called in
>> fastpath.
> On Arm platforms, memcpy in the later versions has the best performance.
I agree with Honnappa, it is better to keep memcpy() here.
Actually what is strange - why it ends up in
__rte_ring_dequeue_elems_128() at all?
Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
which should boil down to ___rte_ring_dequeue_elems_64().
it should go to __rte_ring_dequeue_elems_128() at all.
Another q - is this warning happens only on arm platforms?
>
>>
>> Bugzilla ID: 1062
>> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
>> ---
>> lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
>> 1 file changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h index
>> 83788c56e6..3d85b13333 100644
>> --- a/lib/ring/rte_ring_elem_pvt.h
>> +++ b/lib/ring/rte_ring_elem_pvt.h
>> @@ -10,6 +10,8 @@
>> #ifndef _RTE_RING_ELEM_PVT_H_
>> #define _RTE_RING_ELEM_PVT_H_
>>
>> +#include <rte_memcpy.h>
>> +
>> static __rte_always_inline void
>> __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
>> uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
>> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
>> prod_head,
>> const rte_int128_t *obj = (const rte_int128_t *)obj_table;
>> if (likely(idx + n <= size)) {
>> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
>> - memcpy((void *)(ring + idx),
>> + rte_memcpy((void *)(ring + idx),
>> (const void *)(obj + i), 32);
>> switch (n & 0x1) {
>> case 1:
>> - memcpy((void *)(ring + idx),
>> + rte_memcpy((void *)(ring + idx),
>> (const void *)(obj + i), 16);
>> }
>> } else {
>> for (i = 0; idx < size; i++, idx++)
>> - memcpy((void *)(ring + idx),
>> + rte_memcpy((void *)(ring + idx),
>> (const void *)(obj + i), 16);
>> /* Start at the beginning */
>> for (idx = 0; i < n; i++, idx++)
>> - memcpy((void *)(ring + idx),
>> + rte_memcpy((void *)(ring + idx),
>> (const void *)(obj + i), 16);
>> }
>> }
>> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r,
>> uint32_t prod_head,
>> rte_int128_t *obj = (rte_int128_t *)obj_table;
>> if (likely(idx + n <= size)) {
>> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
>> - memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 32);
>> switch (n & 0x1) {
>> case 1:
>> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> }
>> } else {
>> for (i = 0; idx < size; i++, idx++)
>> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> /* Start at the beginning */
>> for (idx = 0; i < n; i++, idx++)
>> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> + rte_memcpy((void *)(obj + i), (void *)(ring + idx), 16);
>> }
>> }
>>
>> --
>> 2.25.1
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2022-08-07 12:26 ` Konstantin Ananyev
@ 2022-08-23 9:38 ` Amit Prakash Shukla
2022-08-23 9:41 ` Amit Prakash Shukla
2023-01-12 21:41 ` Thomas Monjalon
0 siblings, 2 replies; 10+ messages in thread
From: Amit Prakash Shukla @ 2022-08-23 9:38 UTC (permalink / raw)
To: Konstantin Ananyev, Honnappa Nagarahalli
Cc: dev, Jerin Jacob Kollanukkaran, stable, nd
Thanks for the feedback. My apologies for delayed reply.
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> Sent: Sunday, August 7, 2022 5:56 PM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit Prakash
> Shukla <amitprakashs@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> stable@dpdk.org; nd <nd@arm.com>
> Subject: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
>
> External Email
>
> ----------------------------------------------------------------------
> 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > <snip>
> >
> >>
> >> GCC 12 raises the following warning:
> >>
> >> In function '__rte_ring_dequeue_elems_128',
> >> inlined from '__rte_ring_dequeue_elems' at
> >> ../lib/ring/rte_ring_elem_pvt.h:262:3,
> >> inlined from '__rte_ring_do_hts_dequeue_elem' at
> >> ../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> >> inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> >> ../lib/ring/rte_ring_hts.h:83:9,
> >> inlined from 'rte_ring_dequeue_bulk_elem' at
> >> ../lib/ring/rte_ring_elem.h:391:10,
> >> inlined from 'rte_ring_dequeue_elem' at
> >> ../lib/ring/rte_ring_elem.h:476:9,
> >> inlined from 'rte_ring_dequeue' at
> >> ../lib/ring/rte_ring.h:463:9,
> >> inlined from 'rxa_intr_ring_dequeue' at
> >> ../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> >> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> >> 32 bytes into a region of size 8 overflows the destination
> >> [-Werror=stringop-overflow=]
> >> 234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> >> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> >>
> >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> > Any reason why this replacement fixes the problem?
> > Do you have any performance numbers with this change?
> >
> >> Also it would be better to change to rte_memcpy as the function is
> >> called in fastpath.
> > On Arm platforms, memcpy in the later versions has the best performance.
>
> I agree with Honnappa, it is better to keep memcpy() here.
> Actually what is strange - why it ends up in
> __rte_ring_dequeue_elems_128() at all?
> Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(), which
> should boil down to ___rte_ring_dequeue_elems_64().
> it should go to __rte_ring_dequeue_elems_128() at all.
I agree. After having close look and doing few experiments, ideally it should not be going to __rte_ring_dequeue_elems_128().
Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile time which in this case it is evaluated to 8 bytes so
__rte_ring_dequeue_elems_128() shall not be in the path. Looks like more of a gcc-12 bug.?
>
> Another q - is this warning happens only on arm platforms?
Warning is observed on x86 with build type as debug.
"meson --werror --buildtype=debug build"
>
> >
> >>
> >> Bugzilla ID: 1062
> >> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> >> ---
> >> lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
> >> 1 file changed, 10 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/lib/ring/rte_ring_elem_pvt.h
> >> b/lib/ring/rte_ring_elem_pvt.h index
> >> 83788c56e6..3d85b13333 100644
> >> --- a/lib/ring/rte_ring_elem_pvt.h
> >> +++ b/lib/ring/rte_ring_elem_pvt.h
> >> @@ -10,6 +10,8 @@
> >> #ifndef _RTE_RING_ELEM_PVT_H_
> >> #define _RTE_RING_ELEM_PVT_H_
> >>
> >> +#include <rte_memcpy.h>
> >> +
> >> static __rte_always_inline void
> >> __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
> >> uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> >> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t
> >> prod_head,
> >> const rte_int128_t *obj = (const rte_int128_t *)obj_table;
> >> if (likely(idx + n <= size)) {
> >> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> >> - memcpy((void *)(ring + idx),
> >> + rte_memcpy((void *)(ring + idx),
> >> (const void *)(obj + i), 32);
> >> switch (n & 0x1) {
> >> case 1:
> >> - memcpy((void *)(ring + idx),
> >> + rte_memcpy((void *)(ring + idx),
> >> (const void *)(obj + i), 16);
> >> }
> >> } else {
> >> for (i = 0; idx < size; i++, idx++)
> >> - memcpy((void *)(ring + idx),
> >> + rte_memcpy((void *)(ring + idx),
> >> (const void *)(obj + i), 16);
> >> /* Start at the beginning */
> >> for (idx = 0; i < n; i++, idx++)
> >> - memcpy((void *)(ring + idx),
> >> + rte_memcpy((void *)(ring + idx),
> >> (const void *)(obj + i), 16);
> >> }
> >> }
> >> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct rte_ring
> >> *r, uint32_t prod_head,
> >> rte_int128_t *obj = (rte_int128_t *)obj_table;
> >> if (likely(idx + n <= size)) {
> >> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> >> - memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 32);
> >> switch (n & 0x1) {
> >> case 1:
> >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >> }
> >> } else {
> >> for (i = 0; idx < size; i++, idx++)
> >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >> /* Start at the beginning */
> >> for (idx = 0; i < n; i++, idx++)
> >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> 16);
> >> }
> >> }
> >>
> >> --
> >> 2.25.1
> >
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2022-08-23 9:38 ` [EXT] " Amit Prakash Shukla
@ 2022-08-23 9:41 ` Amit Prakash Shukla
2023-01-12 21:41 ` Thomas Monjalon
1 sibling, 0 replies; 10+ messages in thread
From: Amit Prakash Shukla @ 2022-08-23 9:41 UTC (permalink / raw)
To: Amit Prakash Shukla, Konstantin Ananyev, Honnappa Nagarahalli
Cc: dev, Jerin Jacob Kollanukkaran, stable, nd
> -----Original Message-----
> From: Amit Prakash Shukla <amitprakashs@marvell.com>
> Sent: Tuesday, August 23, 2022 3:08 PM
> To: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> stable@dpdk.org; nd <nd@arm.com>
> Subject: RE: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
>
> Thanks for the feedback. My apologies for delayed reply.
>
> > -----Original Message-----
> > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > Sent: Sunday, August 7, 2022 5:56 PM
> > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit
> Prakash
> > Shukla <amitprakashs@marvell.com>
> > Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> > stable@dpdk.org; nd <nd@arm.com>
> > Subject: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
> >
> > External Email
> >
> > ----------------------------------------------------------------------
> > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > <snip>
> > >
> > >>
> > >> GCC 12 raises the following warning:
> > >>
> > >> In function '__rte_ring_dequeue_elems_128',
> > >> inlined from '__rte_ring_dequeue_elems' at
> > >> ../lib/ring/rte_ring_elem_pvt.h:262:3,
> > >> inlined from '__rte_ring_do_hts_dequeue_elem' at
> > >> ../lib/ring/rte_ring_hts_elem_pvt.h:237:3,
> > >> inlined from 'rte_ring_mc_hts_dequeue_bulk_elem' at
> > >> ../lib/ring/rte_ring_hts.h:83:9,
> > >> inlined from 'rte_ring_dequeue_bulk_elem' at
> > >> ../lib/ring/rte_ring_elem.h:391:10,
> > >> inlined from 'rte_ring_dequeue_elem' at
> > >> ../lib/ring/rte_ring_elem.h:476:9,
> > >> inlined from 'rte_ring_dequeue' at
> > >> ../lib/ring/rte_ring.h:463:9,
> > >> inlined from 'rxa_intr_ring_dequeue' at
> > >> ../lib/eventdev/rte_event_eth_rx_adapter.c:1196:10:
> > >> ../lib/ring/rte_ring_elem_pvt.h:234:25: error: 'memcpy' writing
> > >> 32 bytes into a region of size 8 overflows the destination
> > >> [-Werror=stringop-overflow=]
> > >> 234 | memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> > >> |
> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > >>
> > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> issue.
> > > Any reason why this replacement fixes the problem?
> > > Do you have any performance numbers with this change?
> > >
> > >> Also it would be better to change to rte_memcpy as the function is
> > >> called in fastpath.
> > > On Arm platforms, memcpy in the later versions has the best
> performance.
> >
> > I agree with Honnappa, it is better to keep memcpy() here.
> > Actually what is strange - why it ends up in
> > __rte_ring_dequeue_elems_128() at all?
> > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > which should boil down to ___rte_ring_dequeue_elems_64().
> > it should go to __rte_ring_dequeue_elems_128() at all.
>
> I agree. After having close look and doing few experiments, ideally it should
> not be going to __rte_ring_dequeue_elems_128().
> Sizeof(in call of rte_ring_dequeue_elem) gets evaluated at compile time
> which in this case it is evaluated to 8 bytes so
> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> of a gcc-12 bug.?
>
> >
> > Another q - is this warning happens only on arm platforms?
> Warning is observed on x86 with build type as debug.
> "meson --werror --buildtype=debug build"
>
> >
> > >
> > >>
> > >> Bugzilla ID: 1062
> > >> Fixes: 1fc73390bcf5 ("ring: refactor exported headers")
> > >> Cc: stable@dpdk.org
> > >>
> > >> Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> > >> ---
> > >> lib/ring/rte_ring_elem_pvt.h | 18 ++++++++++--------
> > >> 1 file changed, 10 insertions(+), 8 deletions(-)
> > >>
> > >> diff --git a/lib/ring/rte_ring_elem_pvt.h
> > >> b/lib/ring/rte_ring_elem_pvt.h index
> > >> 83788c56e6..3d85b13333 100644
> > >> --- a/lib/ring/rte_ring_elem_pvt.h
> > >> +++ b/lib/ring/rte_ring_elem_pvt.h
> > >> @@ -10,6 +10,8 @@
> > >> #ifndef _RTE_RING_ELEM_PVT_H_
> > >> #define _RTE_RING_ELEM_PVT_H_
> > >>
> > >> +#include <rte_memcpy.h>
> > >> +
> > >> static __rte_always_inline void
> > >> __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size,
> > >> uint32_t idx, const void *obj_table, uint32_t n) @@ -97,20
> > >> +99,20 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r,
> uint32_t
> > >> prod_head,
> > >> const rte_int128_t *obj = (const rte_int128_t *)obj_table;
> > >> if (likely(idx + n <= size)) {
> > >> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> > >> - memcpy((void *)(ring + idx),
> > >> + rte_memcpy((void *)(ring + idx),
> > >> (const void *)(obj + i), 32);
> > >> switch (n & 0x1) {
> > >> case 1:
> > >> - memcpy((void *)(ring + idx),
> > >> + rte_memcpy((void *)(ring + idx),
> > >> (const void *)(obj + i), 16);
> > >> }
> > >> } else {
> > >> for (i = 0; idx < size; i++, idx++)
> > >> - memcpy((void *)(ring + idx),
> > >> + rte_memcpy((void *)(ring + idx),
> > >> (const void *)(obj + i), 16);
> > >> /* Start at the beginning */
> > >> for (idx = 0; i < n; i++, idx++)
> > >> - memcpy((void *)(ring + idx),
> > >> + rte_memcpy((void *)(ring + idx),
> > >> (const void *)(obj + i), 16);
> > >> }
> > >> }
> > >> @@ -231,17 +233,17 @@ __rte_ring_dequeue_elems_128(struct
> rte_ring
> > >> *r, uint32_t prod_head,
> > >> rte_int128_t *obj = (rte_int128_t *)obj_table;
> > >> if (likely(idx + n <= size)) {
> > >> for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
> > >> - memcpy((void *)(obj + i), (void *)(ring + idx), 32);
> > >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 32);
> > >> switch (n & 0x1) {
> > >> case 1:
> > >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >> }
> > >> } else {
> > >> for (i = 0; idx < size; i++, idx++)
> > >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >> /* Start at the beginning */
> > >> for (idx = 0; i < n; i++, idx++)
> > >> - memcpy((void *)(obj + i), (void *)(ring + idx), 16);
> > >> + rte_memcpy((void *)(obj + i), (void *)(ring + idx),
> > 16);
> > >> }
> > >> }
> > >>
> > >> --
> > >> 2.25.1
> > >
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2022-08-23 9:38 ` [EXT] " Amit Prakash Shukla
2022-08-23 9:41 ` Amit Prakash Shukla
@ 2023-01-12 21:41 ` Thomas Monjalon
2023-01-13 12:39 ` Amit Prakash Shukla
1 sibling, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2023-01-12 21:41 UTC (permalink / raw)
To: Konstantin Ananyev, Honnappa Nagarahalli, Amit Prakash Shukla
Cc: dev, Jerin Jacob Kollanukkaran, david.marchand, bruce.richardson,
ferruh.yigit
23/08/2022 11:38, Amit Prakash Shukla:
> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation issue.
> > >
> > > Any reason why this replacement fixes the problem?
> > > Do you have any performance numbers with this change?
> > >
> > >> Also it would be better to change to rte_memcpy as the function is
> > >> called in fastpath.
> > >
> > > On Arm platforms, memcpy in the later versions has the best performance.
> >
> > I agree with Honnappa, it is better to keep memcpy() here.
> > Actually what is strange - why it ends up in
> > __rte_ring_dequeue_elems_128() at all?
> > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(), which
> > should boil down to ___rte_ring_dequeue_elems_64().
> > it should go to __rte_ring_dequeue_elems_128() at all.
>
> I agree. After having close look and doing few experiments,
> ideally it should not be going to __rte_ring_dequeue_elems_128().
> Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile time
> which in this case it is evaluated to 8 bytes so
> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more of a gcc-12 bug.?
>
> > Another q - is this warning happens only on arm platforms?
>
> Warning is observed on x86 with build type as debug.
> "meson --werror --buildtype=debug build"
I confirm the compilation issue on x86 with GCC 12 in a debug build.
We need to find a workaround.
Is it reported to GCC already?
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2023-01-12 21:41 ` Thomas Monjalon
@ 2023-01-13 12:39 ` Amit Prakash Shukla
2023-01-13 13:11 ` Thomas Monjalon
0 siblings, 1 reply; 10+ messages in thread
From: Amit Prakash Shukla @ 2023-01-13 12:39 UTC (permalink / raw)
To: Thomas Monjalon, Konstantin Ananyev, Honnappa Nagarahalli
Cc: dev, Jerin Jacob Kollanukkaran, david.marchand, bruce.richardson,
ferruh.yigit
Hi Thomas,
> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, January 13, 2023 3:12 AM
> To: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Amit Prakash Shukla
> <amitprakashs@marvell.com>
> Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> david.marchand@redhat.com; bruce.richardson@intel.com;
> ferruh.yigit@amd.com
> Subject: Re: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
>
> 23/08/2022 11:38, Amit Prakash Shukla:
> > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> issue.
> > > >
> > > > Any reason why this replacement fixes the problem?
> > > > Do you have any performance numbers with this change?
> > > >
> > > >> Also it would be better to change to rte_memcpy as the function
> > > >> is called in fastpath.
> > > >
> > > > On Arm platforms, memcpy in the later versions has the best
> performance.
> > >
> > > I agree with Honnappa, it is better to keep memcpy() here.
> > > Actually what is strange - why it ends up in
> > > __rte_ring_dequeue_elems_128() at all?
> > > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > > which should boil down to ___rte_ring_dequeue_elems_64().
> > > it should go to __rte_ring_dequeue_elems_128() at all.
> >
> > I agree. After having close look and doing few experiments, ideally it
> > should not be going to __rte_ring_dequeue_elems_128().
> > Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
> > time which in this case it is evaluated to 8 bytes so
> > __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> of a gcc-12 bug.?
> >
> > > Another q - is this warning happens only on arm platforms?
> >
> > Warning is observed on x86 with build type as debug.
> > "meson --werror --buildtype=debug build"
>
> I confirm the compilation issue on x86 with GCC 12 in a debug build.
>
> We need to find a workaround.
> Is it reported to GCC already?
>
I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689
Kevin has push a work around for DPDK-21.11.3.
https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2023-01-13 12:39 ` Amit Prakash Shukla
@ 2023-01-13 13:11 ` Thomas Monjalon
2023-02-13 1:48 ` Konstantin Ananyev
0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2023-01-13 13:11 UTC (permalink / raw)
To: Konstantin Ananyev, Honnappa Nagarahalli, Amit Prakash Shukla
Cc: dev, Jerin Jacob Kollanukkaran, david.marchand, bruce.richardson,
ferruh.yigit
13/01/2023 13:39, Amit Prakash Shukla:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 23/08/2022 11:38, Amit Prakash Shukla:
> > > From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > > > 06/08/2022 19:35, Honnappa Nagarahalli пишет:
> > > > >> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
> > issue.
> > > > >
> > > > > Any reason why this replacement fixes the problem?
> > > > > Do you have any performance numbers with this change?
> > > > >
> > > > >> Also it would be better to change to rte_memcpy as the function
> > > > >> is called in fastpath.
> > > > >
> > > > > On Arm platforms, memcpy in the later versions has the best
> > performance.
> > > >
> > > > I agree with Honnappa, it is better to keep memcpy() here.
> > > > Actually what is strange - why it ends up in
> > > > __rte_ring_dequeue_elems_128() at all?
> > > > Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
> > > > which should boil down to ___rte_ring_dequeue_elems_64().
> > > > it should go to __rte_ring_dequeue_elems_128() at all.
> > >
> > > I agree. After having close look and doing few experiments, ideally it
> > > should not be going to __rte_ring_dequeue_elems_128().
> > > Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
> > > time which in this case it is evaluated to 8 bytes so
> > > __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
> > of a gcc-12 bug.?
> > >
> > > > Another q - is this warning happens only on arm platforms?
> > >
> > > Warning is observed on x86 with build type as debug.
> > > "meson --werror --buildtype=debug build"
> >
> > I confirm the compilation issue on x86 with GCC 12 in a debug build.
> >
> > We need to find a workaround.
> > Is it reported to GCC already?
> >
> I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689
Please would you like to open a bug specific to GCC 12?
> Kevin has push a work around for DPDK-21.11.3.
> https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60
In the meantime we could use Kevin's workaround:
#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#pragma GCC diagnostic ignored "-Wstringop-overread"
#endif
Opinions?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [EXT] Re: [PATCH] ring: compilation fix with GCC-12
2023-01-13 13:11 ` Thomas Monjalon
@ 2023-02-13 1:48 ` Konstantin Ananyev
0 siblings, 0 replies; 10+ messages in thread
From: Konstantin Ananyev @ 2023-02-13 1:48 UTC (permalink / raw)
To: Thomas Monjalon, Honnappa Nagarahalli, Amit Prakash Shukla
Cc: dev, Jerin Jacob Kollanukkaran, david.marchand, bruce.richardson,
ferruh.yigit
13/01/2023 13:11, Thomas Monjalon пишет:
> 13/01/2023 13:39, Amit Prakash Shukla:
>> From: Thomas Monjalon <thomas@monjalon.net>
>>> 23/08/2022 11:38, Amit Prakash Shukla:
>>>> From: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
>>>>> 06/08/2022 19:35, Honnappa Nagarahalli пишет:
>>>>>>> Replacing memcpy with rte_memcpy fixes the GCC-12 compilation
>>> issue.
>>>>>>
>>>>>> Any reason why this replacement fixes the problem?
>>>>>> Do you have any performance numbers with this change?
>>>>>>
>>>>>>> Also it would be better to change to rte_memcpy as the function
>>>>>>> is called in fastpath.
>>>>>>
>>>>>> On Arm platforms, memcpy in the later versions has the best
>>> performance.
>>>>>
>>>>> I agree with Honnappa, it is better to keep memcpy() here.
>>>>> Actually what is strange - why it ends up in
>>>>> __rte_ring_dequeue_elems_128() at all?
>>>>> Inside rxa_intr_ring_dequeue() we clearly doing: rte_ring_dequeue(),
>>>>> which should boil down to ___rte_ring_dequeue_elems_64().
>>>>> it should go to __rte_ring_dequeue_elems_128() at all.
>>>>
>>>> I agree. After having close look and doing few experiments, ideally it
>>>> should not be going to __rte_ring_dequeue_elems_128().
>>>> Sizeof(in call of rte_ring_enqueue_elem) gets evaluated at compile
>>>> time which in this case it is evaluated to 8 bytes so
>>>> __rte_ring_dequeue_elems_128() shall not be in the path. Looks like more
>>> of a gcc-12 bug.?
>>>>
>>>>> Another q - is this warning happens only on arm platforms?
>>>>
>>>> Warning is observed on x86 with build type as debug.
>>>> "meson --werror --buildtype=debug build"
>>>
>>> I confirm the compilation issue on x86 with GCC 12 in a debug build.
>>>
>>> We need to find a workaround.
>>> Is it reported to GCC already?
>>>
>> I found an old gcc bug reporting similar issue. This bug seems to be re-opened recently in Dec-2022. Not sure if it is reopened specifically for gcc-12.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89689
>
> Please would you like to open a bug specific to GCC 12?
>
>> Kevin has push a work around for DPDK-21.11.3.
>> https://git.dpdk.org/dpdk-stable/commit/?h=21.11&id=e1d728588dc73af9ed60cc0074d51a7f24b2ba60
>
> In the meantime we could use Kevin's workaround:
>
> #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 120000)
> #pragma GCC diagnostic push
> #pragma GCC diagnostic ignored "-Wstringop-overflow"
> #pragma GCC diagnostic ignored "-Wstringop-overread"
> #endif
>
> Opinions?
>
>
Yep, disable warnings should work.
Anoter way to consider - change enqueue/dequeue_elems_128()
functions to not use memcpy() at all.
Instead of that they can copy 2*num 64-bit entities directly,
same as _64_ versions do.
Something like the patch below.
That's pretty similar to what Amit initially proposed,
but without rte_memcpy() involvement.
Performance-wise I don't expect noticeable difference with
what we have right now.
But sure, we'll need to do extra checks here.
diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
index 83788c56e6..de79040618 100644
--- a/lib/ring/rte_ring_elem_pvt.h
+++ b/lib/ring/rte_ring_elem_pvt.h
@@ -93,25 +93,32 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r,
uint32_t prod_head,
unsigned int i;
const uint32_t size = r->size;
uint32_t idx = prod_head & r->mask;
- rte_int128_t *ring = (rte_int128_t *)&r[1];
- const rte_int128_t *obj = (const rte_int128_t *)obj_table;
+ uint64_t *ring = (uint64_t *)&r[1];
+ const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table;
if (likely(idx + n <= size)) {
- for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
- memcpy((void *)(ring + idx),
- (const void *)(obj + i), 32);
+ idx *= 2;
+ for (i = 0; i < 2 * (n & ~0x1); i += 4, idx += 4) {
+ ring[idx] = obj[i];
+ ring[idx + 1] = obj[i + 1];
+ ring[idx + 2] = obj[i + 2];
+ ring[idx + 3] = obj[i + 3];
+ }
switch (n & 0x1) {
case 1:
- memcpy((void *)(ring + idx),
- (const void *)(obj + i), 16);
+ ring[idx] = obj[i];
+ ring[idx + 1] = obj[i + 1];
}
} else {
- for (i = 0; idx < size; i++, idx++)
- memcpy((void *)(ring + idx),
- (const void *)(obj + i), 16);
+ idx *= 2;
+ for (i = 0; idx < 2 * size; i += 2, idx += 2) {
+ ring[idx] = obj[i];
+ ring[idx + 1] = obj[i + 1];
+ }
/* Start at the beginning */
- for (idx = 0; i < n; i++, idx++)
- memcpy((void *)(ring + idx),
- (const void *)(obj + i), 16);
+ for (idx = 0; i < 2 * n; i += 2, idx += 2) {
+ ring[idx] = obj[i];
+ ring[idx + 1] = obj[i + 1];
+ }
}
}
@@ -227,21 +234,32 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r,
uint32_t prod_head,
unsigned int i;
const uint32_t size = r->size;
uint32_t idx = prod_head & r->mask;
- rte_int128_t *ring = (rte_int128_t *)&r[1];
- rte_int128_t *obj = (rte_int128_t *)obj_table;
+ uint64_t *ring = (uint64_t *)&r[1];
+ unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table;
if (likely(idx + n <= size)) {
- for (i = 0; i < (n & ~0x1); i += 2, idx += 2)
- memcpy((void *)(obj + i), (void *)(ring + idx), 32);
+ idx *= 2;
+ for (i = 0; i < 2 * (n & ~0x1); i += 4, idx += 4) {
+ obj[i] = ring[idx];
+ obj[i + 1] = ring[idx + 1];
+ obj[i + 2] = ring[idx + 2];
+ obj[i + 3] = ring[idx + 3];
+ }
switch (n & 0x1) {
case 1:
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ obj[i] = ring[idx];
+ obj[i + 1] = ring[idx + 1];
}
} else {
- for (i = 0; idx < size; i++, idx++)
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ idx *= 2;
+ for (i = 0; idx < 2 * size; i += 2, idx += 2) {
+ obj[i] = ring[idx];
+ obj[i + 1] = ring[idx + 1];
+ }
/* Start at the beginning */
- for (idx = 0; i < n; i++, idx++)
- memcpy((void *)(obj + i), (void *)(ring + idx), 16);
+ for (idx = 0; i < 2 * n; i += 2, idx += 2) {
+ obj[i] = ring[idx];
+ obj[i + 1] = ring[idx + 1];
+ }
}
}
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2023-02-13 1:48 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-05 9:03 [PATCH] ring: compilation fix with GCC-12 Amit Prakash Shukla
2022-08-05 15:37 ` Stephen Hemminger
2022-08-06 18:35 ` Honnappa Nagarahalli
2022-08-07 12:26 ` Konstantin Ananyev
2022-08-23 9:38 ` [EXT] " Amit Prakash Shukla
2022-08-23 9:41 ` Amit Prakash Shukla
2023-01-12 21:41 ` Thomas Monjalon
2023-01-13 12:39 ` Amit Prakash Shukla
2023-01-13 13:11 ` Thomas Monjalon
2023-02-13 1:48 ` Konstantin Ananyev
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).