DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform
@ 2020-12-18 10:12 Ruifeng Wang
  2021-01-05 15:44 ` Medvedkin, Vladimir
                   ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Ruifeng Wang @ 2020-12-18 10:12 UTC (permalink / raw)
  To: Jan Viktorin, Ruifeng Wang, Jerin Jacob, Bruce Richardson,
	Vladimir Medvedkin
  Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd

Added new path to do lpm4 lookup by using scalable vector extension.
The SVE path will be selected if compiler has flag SVE set.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/librte_eal/arm/include/rte_vect.h |  3 +
 lib/librte_lpm/meson.build            |  2 +-
 lib/librte_lpm/rte_lpm.h              |  4 ++
 lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index a739e6e66..093e9122a 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -9,6 +9,9 @@
 #include "generic/rte_vect.h"
 #include "rte_debug.h"
 #include "arm_neon.h"
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 6cfc083c5..f93c86640 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
 headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
-headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
 deps += ['hash']
 deps += ['rcu']
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 1afe55cdc..28b57683b 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
 #if defined(RTE_ARCH_ARM)
+#ifdef __ARM_FEATURE_SVE
+#include "rte_lpm_sve.h"
+#else
 #include "rte_lpm_neon.h"
+#endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
 #else
diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
new file mode 100644
index 000000000..86576ec52
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_sve.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_LPM_SVE_H_
+#define _RTE_LPM_SVE_H_
+
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__rte_internal
+static void
+__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+		uint32_t *__rte_restrict next_hops, const uint32_t n)
+{
+	uint32_t i = 0;
+	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
+	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
+	svbool_t pg = svwhilelt_b32(i, n);
+	svbool_t pv;
+
+	do {
+		v_ip = svld1(pg, &ips[i]);
+		/* Get indices for tbl24[] */
+		v_idx = svlsr_x(pg, v_ip, 8);
+		/* Extract values from tbl24[] */
+		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
+						v_idx);
+
+		/* Create mask with valid set */
+		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
+		/* Create mask with valid and valid_group set */
+		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
+		/* Create predicate for tbl24 entries: (valid && !valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
+		/* Create mask for next_hop in table entry */
+		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		/* Update predicate for tbl24 entries: (valid && valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
+		/* Compute tbl8 index */
+		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff));
+		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
+				v_idx);
+		/* Extract values from tbl8[] */
+		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
+						v_idx);
+		/* Update predicate for tbl8 entries: (valid) */
+		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		i += svlen(v_ip);
+		pg = svwhilelt_b32(i, n);
+	} while (svptest_any(svptrue_b32(), pg));
+}
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+		uint32_t defv)
+{
+	uint32_t i, ips[4];
+
+	vst1q_s32((int32_t *)ips, ip);
+	for (i = 0; i < 4; i++)
+		hop[i] = defv;
+
+	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_SVE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform
  2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang
@ 2021-01-05 15:44 ` Medvedkin, Vladimir
  2021-01-06 10:11   ` Ruifeng Wang
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
  2 siblings, 1 reply; 43+ messages in thread
From: Medvedkin, Vladimir @ 2021-01-05 15:44 UTC (permalink / raw)
  To: Ruifeng Wang, Jan Viktorin, Jerin Jacob, Bruce Richardson
  Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd

Hi Ruifeng,

Thanks for the patch, see comments below

On 18/12/2020 10:12, Ruifeng Wang wrote:
> Added new path to do lpm4 lookup by using scalable vector extension.
> The SVE path will be selected if compiler has flag SVE set.
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   lib/librte_eal/arm/include/rte_vect.h |  3 +
>   lib/librte_lpm/meson.build            |  2 +-
>   lib/librte_lpm/rte_lpm.h              |  4 ++
>   lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
>   4 files changed, 91 insertions(+), 1 deletion(-)
>   create mode 100644 lib/librte_lpm/rte_lpm_sve.h
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
> index a739e6e66..093e9122a 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -9,6 +9,9 @@
>   #include "generic/rte_vect.h"
>   #include "rte_debug.h"
>   #include "arm_neon.h"
> +#ifdef __ARM_FEATURE_SVE
> +#include <arm_sve.h>
> +#endif
>   
>   #ifdef __cplusplus
>   extern "C" {
> diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
> index 6cfc083c5..f93c86640 100644
> --- a/lib/librte_lpm/meson.build
> +++ b/lib/librte_lpm/meson.build
> @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
>   headers = files('rte_lpm.h', 'rte_lpm6.h')
>   # since header files have different names, we can install all vector headers
>   # without worrying about which architecture we actually need
> -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
> +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
>   deps += ['hash']
>   deps += ['rcu']
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 1afe55cdc..28b57683b 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv);
>   
>   #if defined(RTE_ARCH_ARM)
> +#ifdef __ARM_FEATURE_SVE
> +#include "rte_lpm_sve.h"
> +#else
>   #include "rte_lpm_neon.h"
> +#endif
>   #elif defined(RTE_ARCH_PPC_64)
>   #include "rte_lpm_altivec.h"
>   #else
> diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
> new file mode 100644
> index 000000000..86576ec52
> --- /dev/null
> +++ b/lib/librte_lpm/rte_lpm_sve.h
> @@ -0,0 +1,83 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Arm Limited
> + */
> +
> +#ifndef _RTE_LPM_SVE_H_
> +#define _RTE_LPM_SVE_H_
> +
> +#include <rte_vect.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +__rte_internal
> +static void
> +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> +		uint32_t *__rte_restrict next_hops, const uint32_t n)
> +{
> +	uint32_t i = 0;
> +	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
> +	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
> +	svbool_t pg = svwhilelt_b32(i, n);
> +	svbool_t pv;
> +
> +	do {
> +		v_ip = svld1(pg, &ips[i]);
> +		/* Get indices for tbl24[] */
> +		v_idx = svlsr_x(pg, v_ip, 8);
> +		/* Extract values from tbl24[] */
> +		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
> +						v_idx);
> +
> +		/* Create mask with valid set */
> +		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
> +		/* Create mask with valid and valid_group set */
> +		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
> +		/* Create predicate for tbl24 entries: (valid && !valid_group) */
> +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
> +		/* Create mask for next_hop in table entry */
> +		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
> +		/* Extract next_hop and write back */
> +		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
> +		svst1(pv, &next_hops[i], v_hop);
> +
> +		/* Update predicate for tbl24 entries: (valid && valid_group) */
> +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
> +		/* Compute tbl8 index */
> +		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff));

Loos like here should be
v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
because we are using 24 bits to keep tbl8 group.


> +		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
> +		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
> +				v_idx);
> +		/* Extract values from tbl8[] */
> +		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
> +						v_idx);
> +		/* Update predicate for tbl8 entries: (valid) */
> +		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
> +		/* Extract next_hop and write back */
> +		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
> +		svst1(pv, &next_hops[i], v_hop);

I'm not an expert, but probably it would be better to merge two stores 
(svst1) into a single one?

> +
> +		i += svlen(v_ip);
> +		pg = svwhilelt_b32(i, n);

Isn't it better to move the predicate calculation to the beginning of 
the loop and just do {} while (i < n)?

> +	} while (svptest_any(svptrue_b32(), pg));
> +}
> +
> +static inline void
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +		uint32_t defv)
> +{
> +	uint32_t i, ips[4];
> +
> +	vst1q_s32((int32_t *)ips, ip);
> +	for (i = 0; i < 4; i++)
> +		hop[i] = defv;
> +
> +	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_LPM_SVE_H_ */
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform
  2021-01-05 15:44 ` Medvedkin, Vladimir
@ 2021-01-06 10:11   ` Ruifeng Wang
  0 siblings, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-06 10:11 UTC (permalink / raw)
  To: Medvedkin, Vladimir, Jan Viktorin, jerinj, Bruce Richardson
  Cc: dev, hemant.agrawal, Honnappa Nagarahalli, nd, nd

> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Tuesday, January 5, 2021 11:44 PM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Jan Viktorin
> <viktorin@rehivetech.com>; jerinj@marvell.com; Bruce Richardson
> <bruce.richardson@intel.com>
> Cc: dev@dpdk.org; hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: Re: [RFC PATCH] lpm: add sve support for lookup on Arm platform
> 
> Hi Ruifeng,
> 
> Thanks for the patch, see comments below

Hi Vladimir,
Thank you for your review.

> 
> On 18/12/2020 10:12, Ruifeng Wang wrote:
> > Added new path to do lpm4 lookup by using scalable vector extension.
> > The SVE path will be selected if compiler has flag SVE set.
> >
> > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >   lib/librte_eal/arm/include/rte_vect.h |  3 +
> >   lib/librte_lpm/meson.build            |  2 +-
> >   lib/librte_lpm/rte_lpm.h              |  4 ++
> >   lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
> >   4 files changed, 91 insertions(+), 1 deletion(-)
> >   create mode 100644 lib/librte_lpm/rte_lpm_sve.h
> >
> > diff --git a/lib/librte_eal/arm/include/rte_vect.h
> > b/lib/librte_eal/arm/include/rte_vect.h
> > index a739e6e66..093e9122a 100644
> > --- a/lib/librte_eal/arm/include/rte_vect.h
> > +++ b/lib/librte_eal/arm/include/rte_vect.h
> > @@ -9,6 +9,9 @@
> >   #include "generic/rte_vect.h"
> >   #include "rte_debug.h"
> >   #include "arm_neon.h"
> > +#ifdef __ARM_FEATURE_SVE
> > +#include <arm_sve.h>
> > +#endif
> >
> >   #ifdef __cplusplus
> >   extern "C" {
> > diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
> > index 6cfc083c5..f93c86640 100644
> > --- a/lib/librte_lpm/meson.build
> > +++ b/lib/librte_lpm/meson.build
> > @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
> >   headers = files('rte_lpm.h', 'rte_lpm6.h')
> >   # since header files have different names, we can install all vector headers
> >   # without worrying about which architecture we actually need
> > -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h',
> > 'rte_lpm_sse.h')
> > +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h',
> > +'rte_lpm_sse.h', 'rte_lpm_sve.h')
> >   deps += ['hash']
> >   deps += ['rcu']
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > 1afe55cdc..28b57683b 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> xmm_t ip, uint32_t hop[4],
> >   	uint32_t defv);
> >
> >   #if defined(RTE_ARCH_ARM)
> > +#ifdef __ARM_FEATURE_SVE
> > +#include "rte_lpm_sve.h"
> > +#else
> >   #include "rte_lpm_neon.h"
> > +#endif
> >   #elif defined(RTE_ARCH_PPC_64)
> >   #include "rte_lpm_altivec.h"
> >   #else
> > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > 000000000..86576ec52
> > --- /dev/null
> > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > @@ -0,0 +1,83 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_LPM_SVE_H_
> > +#define _RTE_LPM_SVE_H_
> > +
> > +#include <rte_vect.h>
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +__rte_internal
> > +static void
> > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> > +		uint32_t *__rte_restrict next_hops, const uint32_t n) {
> > +	uint32_t i = 0;
> > +	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
> > +	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
> > +	svbool_t pg = svwhilelt_b32(i, n);
> > +	svbool_t pv;
> > +
> > +	do {
> > +		v_ip = svld1(pg, &ips[i]);
> > +		/* Get indices for tbl24[] */
> > +		v_idx = svlsr_x(pg, v_ip, 8);
> > +		/* Extract values from tbl24[] */
> > +		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm-
> >tbl24,
> > +						v_idx);
> > +
> > +		/* Create mask with valid set */
> > +		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
> > +		/* Create mask with valid and valid_group set */
> > +		v_mask_xv = svdup_u32_z(pg,
> RTE_LPM_VALID_EXT_ENTRY_BITMASK);
> > +		/* Create predicate for tbl24 entries: (valid && !valid_group)
> */
> > +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv),
> v_mask_v);
> > +		/* Create mask for next_hop in table entry */
> > +		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
> > +		/* Extract next_hop and write back */
> > +		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
> > +		svst1(pv, &next_hops[i], v_hop);
> > +
> > +		/* Update predicate for tbl24 entries: (valid && valid_group)
> */
> > +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv),
> v_mask_xv);
> > +		/* Compute tbl8 index */
> > +		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff));
> 
> Loos like here should be
> v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); because we are
> using 24 bits to keep tbl8 group.

Yes, the mask should be 0xffffff.

Also noticed there is common issue in all vector lookup implementations (NEON/SSE/ALTIVEC).
I'll correct this and fix other vector implementations in next version.

> 
> 
> > +		v_idx = svmul_x(pv, v_idx,
> RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
> > +		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv,
> 0xff)),
> > +				v_idx);
> > +		/* Extract values from tbl8[] */
> > +		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
> > +						v_idx);
> > +		/* Update predicate for tbl8 entries: (valid) */
> > +		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
> > +		/* Extract next_hop and write back */
> > +		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
> > +		svst1(pv, &next_hops[i], v_hop);
> 
> I'm not an expert, but probably it would be better to merge two stores
> (svst1) into a single one?

I think we can keep current implementation.
In most cases, tbl24 will be not expanded. Then SVE predicate for tbl8 processing will be zero.
So operations on tbl8 will be null operations.
I think it is better not to mix the two stores (from tbl24 and from tbl8).
> 
> > +
> > +		i += svlen(v_ip);
> > +		pg = svwhilelt_b32(i, n);
> 
> Isn't it better to move the predicate calculation to the beginning of the loop
> and just do {} while (i < n)?

Yes, that also works.
I think checking on SVE predicates is the suggested way to do vector length agnostic loop.
It is more generic and flexible.
> 
> > +	} while (svptest_any(svptrue_b32(), pg)); }
> > +
> > +static inline void
> > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +		uint32_t defv)
> > +{
> > +	uint32_t i, ips[4];
> > +
> > +	vst1q_s32((int32_t *)ips, ip);
> > +	for (i = 0; i < 4; i++)
> > +		hop[i] = defv;
> > +
> > +	__rte_lpm_lookup_vec(lpm, ips, hop, 4); }
> > +
> > +#ifdef __cplusplus
> > +}
> > +#endif
> > +
> > +#endif /* _RTE_LPM_SVE_H_ */
> >
> 
> --
> Regards,
> Vladimir

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support
  2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang
  2021-01-05 15:44 ` Medvedkin, Vladimir
@ 2021-01-08  8:25 ` Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
                     ` (4 more replies)
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
  2 siblings, 5 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang

Added lpm4 lookupx4 implementation by using Arm SVE extension.
The SVE is Scalable Vector Extension which is exposed to the
user with a vector length agnostic interface.
Refer to [1] for more information about SVE.

Configuration was added for Neoverse N2 CPU which has SVE support.

Some bugs were fixed so compiling with sve enabled can pass.

[1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve

---
v2:
Fixed tbl8 group index calculation. (Vladimir)
Added N2 config.
Fixed compiling when sve was enabled.

Ruifeng Wang (5):
  lpm: add sve support for lookup on Arm platform
  net/hns3: fix build with sve enabled
  net/octeontx: fix build with sve enabled
  common/octeontx2: fix build with sve enabled
  config: add Arm Neoverse N2

 config/arm/arm64_n2_linux_gcc            | 17 +++++
 config/arm/meson.build                   | 11 +++-
 drivers/common/octeontx2/otx2_io_arm64.h | 37 ++---------
 drivers/net/hns3/meson.build             |  2 +-
 drivers/net/octeontx/base/octeontx_io.h  | 16 ++---
 lib/librte_eal/arm/include/rte_vect.h    |  3 +
 lib/librte_lpm/meson.build               |  2 +-
 lib/librte_lpm/rte_lpm.h                 |  4 ++
 lib/librte_lpm/rte_lpm_sve.h             | 83 ++++++++++++++++++++++++
 9 files changed, 127 insertions(+), 48 deletions(-)
 create mode 100644 config/arm/arm64_n2_linux_gcc
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
@ 2021-01-08  8:25   ` Ruifeng Wang
  2021-01-13 18:54     ` Medvedkin, Vladimir
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  To: Jan Viktorin, Ruifeng Wang, Jerin Jacob, Bruce Richardson,
	Vladimir Medvedkin
  Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd

Added new path to do lpm4 lookup by using scalable vector extension.
The SVE path will be selected if compiler has flag SVE set.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 lib/librte_eal/arm/include/rte_vect.h |  3 +
 lib/librte_lpm/meson.build            |  2 +-
 lib/librte_lpm/rte_lpm.h              |  4 ++
 lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index a739e6e66..093e9122a 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -9,6 +9,9 @@
 #include "generic/rte_vect.h"
 #include "rte_debug.h"
 #include "arm_neon.h"
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 6cfc083c5..f93c86640 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
 headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
-headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
 deps += ['hash']
 deps += ['rcu']
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 1afe55cdc..28b57683b 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
 #if defined(RTE_ARCH_ARM)
+#ifdef __ARM_FEATURE_SVE
+#include "rte_lpm_sve.h"
+#else
 #include "rte_lpm_neon.h"
+#endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
 #else
diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
new file mode 100644
index 000000000..2e319373e
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_sve.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_LPM_SVE_H_
+#define _RTE_LPM_SVE_H_
+
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__rte_internal
+static void
+__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+		uint32_t *__rte_restrict next_hops, const uint32_t n)
+{
+	uint32_t i = 0;
+	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
+	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
+	svbool_t pg = svwhilelt_b32(i, n);
+	svbool_t pv;
+
+	do {
+		v_ip = svld1(pg, &ips[i]);
+		/* Get indices for tbl24[] */
+		v_idx = svlsr_x(pg, v_ip, 8);
+		/* Extract values from tbl24[] */
+		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
+						v_idx);
+
+		/* Create mask with valid set */
+		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
+		/* Create mask with valid and valid_group set */
+		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
+		/* Create predicate for tbl24 entries: (valid && !valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
+		/* Create mask for next_hop in table entry */
+		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		/* Update predicate for tbl24 entries: (valid && valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
+		/* Compute tbl8 index */
+		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
+		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
+				v_idx);
+		/* Extract values from tbl8[] */
+		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
+						v_idx);
+		/* Update predicate for tbl8 entries: (valid) */
+		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		i += svlen(v_ip);
+		pg = svwhilelt_b32(i, n);
+	} while (svptest_any(svptrue_b32(), pg));
+}
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+		uint32_t defv)
+{
+	uint32_t i, ips[4];
+
+	vst1q_s32((int32_t *)ips, ip);
+	for (i = 0; i < 4; i++)
+		hop[i] = defv;
+
+	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_SVE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
@ 2021-01-08  8:25   ` Ruifeng Wang
  2021-01-09  0:06     ` Honnappa Nagarahalli
  2021-01-09  2:15     ` oulijun
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang
                     ` (2 subsequent siblings)
  4 siblings, 2 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  To: Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Lijun Ou, Huisong Li, Chengchang Tang,
	Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang, stable

Building with SVE extension enabled stopped with error:

 error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
   18 | #define PG64_256BIT  svwhilelt_b64(0, 4)

This is caused by unintentional cflags reset.
Fixed the issue by appending required flag to cflags instead of
overriding it.

Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
Cc: xavier.huwei@huawei.com
Cc: stable@dpdk.org

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 drivers/net/hns3/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
index 45cee34d9..798086357 100644
--- a/drivers/net/hns3/meson.build
+++ b/drivers/net/hns3/meson.build
@@ -32,7 +32,7 @@ deps += ['hash']
 if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
 	sources += files('hns3_rxtx_vec.c')
 	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
-		cflags = ['-DCC_SVE_SUPPORT']
+		cflags += ['-DCC_SVE_SUPPORT']
 		sources += files('hns3_rxtx_vec_sve.c')
 	endif
 endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 3/5] net/octeontx: fix build with sve enabled
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
@ 2021-01-08  8:25   ` Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang
  4 siblings, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  To: Harman Kalra, Jerin Jacob, Santosh Shukla
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang, stable

Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1'
{standard input}:95: Error: selected processor does not support `ptrue p1.d,all'
{standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5'
{standard input}:137: Error: selected processor does not support `decb x1'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Fixed the issue by replacing inline assembly with equivalent atomic
built-ins. Compiler will generate LSE instructions for cpu that has
the extension.

Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations")
Cc: jerinj@marvell.com
Cc: stable@dpdk.org

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 drivers/net/octeontx/base/octeontx_io.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h
index 04b9ce191..0bf9b100d 100644
--- a/drivers/net/octeontx/base/octeontx_io.h
+++ b/drivers/net/octeontx/base/octeontx_io.h
@@ -58,14 +58,8 @@ do {							\
 static inline uint64_t
 octeontx_reg_ldadd_u64(void *addr, int64_t off)
 {
-	uint64_t old_val;
-
-	__asm__ volatile(
-		" .cpu		generic+lse\n"
-		" ldadd	%1, %0, [%2]\n"
-		: "=r" (old_val) : "r" (off), "r" (addr) : "memory");
-
-	return old_val;
+	return (uint64_t)__atomic_fetch_add((int64_t *)addr, off,
+						__ATOMIC_RELAXED);
 }
 
 /**
@@ -97,10 +91,8 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[],
 		}
 
 		/* LDEOR initiates atomic transfer to I/O device */
-		__asm__ volatile(
-			" .cpu		generic+lse\n"
-			" ldeor	xzr, %0, [%1]\n"
-			: "=r" (result) : "r" (ioreg_va) : "memory");
+		result = __atomic_fetch_xor((uint64_t *)ioreg_va, 0,
+						__ATOMIC_RELAXED);
 	} while (!result);
 }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
                     ` (2 preceding siblings ...)
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang
@ 2021-01-08  8:25   ` Ruifeng Wang
  2021-01-08 10:29     ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang
  4 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh
  Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli,
	nd, Ruifeng Wang, stable

Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:4002: Error: selected processor does not support `mov z3.b,#0'
{standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7'
{standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]'
{standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Fixed the issue by replacing inline assembly with equivalent atomic
built-ins. Compiler will generate LSE instructions for cpu that has
the extension.

Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
Cc: jerinj@marvell.com
Cc: stable@dpdk.org

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 drivers/common/octeontx2/otx2_io_arm64.h | 37 +++---------------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h
index b5c85d9a6..8843a79b5 100644
--- a/drivers/common/octeontx2/otx2_io_arm64.h
+++ b/drivers/common/octeontx2/otx2_io_arm64.h
@@ -24,55 +24,26 @@
 static __rte_always_inline uint64_t
 otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 {
-	uint64_t result;
-
 	/* Atomic add with no ordering */
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldadd %x[i], %x[r], [%[b]]"
-		: [r] "=r" (result), "+m" (*ptr)
-		: [i] "r" (incr), [b] "r" (ptr)
-		: "memory");
-	return result;
+	return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_RELAXED);
 }
 
 static __rte_always_inline uint64_t
 otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
 {
-	uint64_t result;
-
-	/* Atomic add with ordering */
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldadda %x[i], %x[r], [%[b]]"
-		: [r] "=r" (result), "+m" (*ptr)
-		: [i] "r" (incr), [b] "r" (ptr)
-		: "memory");
-	return result;
+	return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_ACQUIRE);
 }
 
 static __rte_always_inline uint64_t
 otx2_lmt_submit(rte_iova_t io_address)
 {
-	uint64_t result;
-
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldeor xzr,%x[rf],[%[rs]]" :
-		 [rf] "=r"(result): [rs] "r"(io_address));
-	return result;
+	return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELAXED);
 }
 
 static __rte_always_inline uint64_t
 otx2_lmt_submit_release(rte_iova_t io_address)
 {
-	uint64_t result;
-
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldeorl xzr,%x[rf],[%[rs]]" :
-		 [rf] "=r"(result) : [rs] "r"(io_address));
-	return result;
+	return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELEASE);
 }
 
 static __rte_always_inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
                     ` (3 preceding siblings ...)
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang
@ 2021-01-08  8:25   ` Ruifeng Wang
  2021-01-08 23:58     ` Honnappa Nagarahalli
  4 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-08  8:25 UTC (permalink / raw)
  To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson
  Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli, nd

Add Arm Neoverse N2 cpu support.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
 config/arm/meson.build        | 11 ++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 config/arm/arm64_n2_linux_gcc

diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc
new file mode 100644
index 000000000..78f6f3e2b
--- /dev/null
+++ b/config/arm/arm64_n2_linux_gcc
@@ -0,0 +1,17 @@
+[binaries]
+c = 'aarch64-linux-gnu-gcc'
+cpp = 'aarch64-linux-gnu-cpp'
+ar = 'aarch64-linux-gnu-gcc-ar'
+strip = 'aarch64-linux-gnu-strip'
+pkgconfig = 'aarch64-linux-gnu-pkg-config'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'armv8-a'
+endian = 'little'
+
+[properties]
+implementor_id = '0x41'
+implementor_pn = '0xd49'
diff --git a/config/arm/meson.build b/config/arm/meson.build
index 42b4e43c7..58e0ae643 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -89,6 +89,14 @@ flags_n1generic_extra = [
 	['RTE_MAX_NUMA_NODES', 1],
 	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
 	['RTE_LIBRTE_VHOST_NUMA', false]]
+flags_n2generic_extra = [
+	['RTE_MACHINE', '"neoverse-n2"'],
+	['RTE_MAX_LCORE', 64],
+	['RTE_CACHE_LINE_SIZE', 64],
+	['RTE_ARM_FEATURE_ATOMICS', true],
+	['RTE_USE_C11_MEM_MODEL', true],
+	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
+	['RTE_LIBRTE_VHOST_NUMA', false]]
 
 machine_args_generic = [
 	['default', ['-march=armv8-a+crc', '-moutline-atomics']],
@@ -100,7 +108,8 @@ machine_args_generic = [
 	['0xd09', ['-mcpu=cortex-a73']],
 	['0xd0a', ['-mcpu=cortex-a75']],
 	['0xd0b', ['-mcpu=cortex-a76']],
-	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]]
+	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra],
+	['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
 
 machine_args_cavium = [
 	['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang
@ 2021-01-08 10:29     ` Pavan Nikhilesh Bhagavatula
  2021-01-11  9:51       ` Ruifeng Wang
  0 siblings, 1 reply; 43+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2021-01-08 10:29 UTC (permalink / raw)
  To: Ruifeng Wang, Jerin Jacob Kollanukkaran, Nithin Kumar Dabilpuram
  Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli,
	nd, stable

Hi Ruifeng,

>Building with gcc 10.2 with SVE extension enabled got error:
>
>{standard input}: Assembler messages:
>{standard input}:4002: Error: selected processor does not support `mov
>z3.b,#0'
>{standard input}:4003: Error: selected processor does not support
>`whilelo p1.b,xzr,x7'
>{standard input}:4005: Error: selected processor does not support `ld1b
>z0.b,p1/z,[x8]'
>{standard input}:4006: Error: selected processor does not support
>`whilelo p4.s,wzr,w7'
>
>This is because inline assembly code explicitly resets cpu model to
>not have SVE support. Thus SVE instructions generated by compiler
>auto vectorization got rejected by assembler.
>
>Fixed the issue by replacing inline assembly with equivalent atomic
>built-ins. Compiler will generate LSE instructions for cpu that has
>the extension.
>
>Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
>Cc: jerinj@marvell.com
>Cc: stable@dpdk.org
>
>Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
>---
> drivers/common/octeontx2/otx2_io_arm64.h | 37 +++--------------------
>-
> 1 file changed, 4 insertions(+), 33 deletions(-)
>
>diff --git a/drivers/common/octeontx2/otx2_io_arm64.h
>b/drivers/common/octeontx2/otx2_io_arm64.h
>index b5c85d9a6..8843a79b5 100644
>--- a/drivers/common/octeontx2/otx2_io_arm64.h
>+++ b/drivers/common/octeontx2/otx2_io_arm64.h
>@@ -24,55 +24,26 @@
> static __rte_always_inline uint64_t
> otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
> {
>-	uint64_t result;
>-
> 	/* Atomic add with no ordering */
>-	asm volatile (
>-		".cpu  generic+lse\n"
>-		"ldadd %x[i], %x[r], [%[b]]"
>-		: [r] "=r" (result), "+m" (*ptr)
>-		: [i] "r" (incr), [b] "r" (ptr)
>-		: "memory");
>-	return result;
>+	return (uint64_t)__atomic_fetch_add(ptr, incr,
>__ATOMIC_RELAXED);
> }
>

Here LDADD acts as a way to interface to co-processors i.e. 
LDADD instruction opcode + specific io address are recognized by 
HW interceptor and dispatched to the specific coprocessor.

Leaving it to the compiler to use the correct instruction is a bad idea.
This breaks the arm64_armv8_linux_gcc build as it doesn't have the
+lse enabled.
__atomic_fetch_add will generate a different instruction with SVE 
enabled.

Instead can we add +sve to the first line to prevent outer loop from optimizing out 
the trap?

I tested with 10.2 and n2 config below change works fine.
-" .cpu          generic+lse\n"
+" .cpu		generic+lse+sve\n"

Regards,
Pavan.

> static __rte_always_inline uint64_t
> otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
> {
>-	uint64_t result;
>-
>-	/* Atomic add with ordering */
>-	asm volatile (
>-		".cpu  generic+lse\n"
>-		"ldadda %x[i], %x[r], [%[b]]"
>-		: [r] "=r" (result), "+m" (*ptr)
>-		: [i] "r" (incr), [b] "r" (ptr)
>-		: "memory");
>-	return result;
>+	return (uint64_t)__atomic_fetch_add(ptr, incr,
>__ATOMIC_ACQUIRE);
> }
>
> static __rte_always_inline uint64_t
> otx2_lmt_submit(rte_iova_t io_address)
> {
>-	uint64_t result;
>-
>-	asm volatile (
>-		".cpu  generic+lse\n"
>-		"ldeor xzr,%x[rf],[%[rs]]" :
>-		 [rf] "=r"(result): [rs] "r"(io_address));
>-	return result;
>+	return __atomic_fetch_xor((uint64_t *)io_address, 0,
>__ATOMIC_RELAXED);
> }
>
> static __rte_always_inline uint64_t
> otx2_lmt_submit_release(rte_iova_t io_address)
> {
>-	uint64_t result;
>-
>-	asm volatile (
>-		".cpu  generic+lse\n"
>-		"ldeorl xzr,%x[rf],[%[rs]]" :
>-		 [rf] "=r"(result) : [rs] "r"(io_address));
>-	return result;
>+	return __atomic_fetch_xor((uint64_t *)io_address, 0,
>__ATOMIC_RELEASE);
> }
>
> static __rte_always_inline void
>--
>2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang
@ 2021-01-08 23:58     ` Honnappa Nagarahalli
  2021-01-11  3:01       ` Ruifeng Wang
  0 siblings, 1 reply; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-08 23:58 UTC (permalink / raw)
  To: Ruifeng Wang, jerinj, Ruifeng Wang, Jan Viktorin, Bruce Richardson
  Cc: dev, vladimir.medvedkin, hemant.agrawal, nd, Honnappa Nagarahalli, nd

+ Juraj

Please note that this clashes with Juraj's patch for meson rework.

<snip>

> 
> Add Arm Neoverse N2 cpu support.
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
>  config/arm/meson.build        | 11 ++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> config/arm/arm64_n2_linux_gcc
> 
> diff --git a/config/arm/arm64_n2_linux_gcc
> b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> 000000000..78f6f3e2b
> --- /dev/null
> +++ b/config/arm/arm64_n2_linux_gcc
> @@ -0,0 +1,17 @@
> +[binaries]
> +c = 'aarch64-linux-gnu-gcc'
> +cpp = 'aarch64-linux-gnu-cpp'
> +ar = 'aarch64-linux-gnu-gcc-ar'
> +strip = 'aarch64-linux-gnu-strip'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +pcap-config = ''
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +implementor_id = '0x41'
> +implementor_pn = '0xd49'
> diff --git a/config/arm/meson.build b/config/arm/meson.build index
> 42b4e43c7..58e0ae643 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -89,6 +89,14 @@ flags_n1generic_extra = [
>  	['RTE_MAX_NUMA_NODES', 1],
>  	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
>  	['RTE_LIBRTE_VHOST_NUMA', false]]
> +flags_n2generic_extra = [
> +	['RTE_MACHINE', '"neoverse-n2"'],
> +	['RTE_MAX_LCORE', 64],
> +	['RTE_CACHE_LINE_SIZE', 64],
> +	['RTE_ARM_FEATURE_ATOMICS', true],
> +	['RTE_USE_C11_MEM_MODEL', true],
> +	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> +	['RTE_LIBRTE_VHOST_NUMA', false]]
Do we need a flag RTE_ARM_FEATURE_SVE?

> 
>  machine_args_generic = [
>  	['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> +108,8 @@ machine_args_generic = [
>  	['0xd09', ['-mcpu=cortex-a73']],
>  	['0xd0a', ['-mcpu=cortex-a75']],
>  	['0xd0b', ['-mcpu=cortex-a76']],
> -	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> flags_n1generic_extra]]
> +	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> flags_n1generic_extra],
> +	['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
Should this be 'sve2'? There should be a flag to indicate SVE2.

> 
>  machine_args_cavium = [
>  	['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
@ 2021-01-09  0:06     ` Honnappa Nagarahalli
  2021-01-09  2:11       ` oulijun
  2021-01-09  2:15     ` oulijun
  1 sibling, 1 reply; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-09  0:06 UTC (permalink / raw)
  To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Lijun Ou, Huisong Li, Chengchang Tang,
	Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd,
	Ruifeng Wang, stable, Honnappa Nagarahalli, nd

<snip>

> 
> Building with SVE extension enabled stopped with error:
> 
>  error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
>    18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> 
> This is caused by unintentional cflags reset.
> Fixed the issue by appending required flag to cflags instead of overriding it.
> 
> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> Cc: xavier.huwei@huawei.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  drivers/net/hns3/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
> index 45cee34d9..798086357 100644
> --- a/drivers/net/hns3/meson.build
> +++ b/drivers/net/hns3/meson.build
> @@ -32,7 +32,7 @@ deps += ['hash']
>  if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
>  	sources += files('hns3_rxtx_vec.c')
>  	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> -		cflags = ['-DCC_SVE_SUPPORT']
> +		cflags += ['-DCC_SVE_SUPPORT']
This comment is unrelated to this patch. We need to be consistent with the macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to define an additional flag, I would name it something like 'RTE_ARM_FEATURE_SVE'.

>  		sources += files('hns3_rxtx_vec_sve.c')
>  	endif
>  endif
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-09  0:06     ` Honnappa Nagarahalli
@ 2021-01-09  2:11       ` oulijun
  2021-01-11  2:39         ` Ruifeng Wang
  0 siblings, 1 reply; 43+ messages in thread
From: oulijun @ 2021-01-09  2:11 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Ruifeng Wang, Wei Hu (Xavier),
	Min Hu (Connor),
	Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable


在 2021/1/9 8:06, Honnappa Nagarahalli 写道:
> <snip>
> 
>>
>> Building with SVE extension enabled stopped with error:
>>
>>   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
>>     18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
>>
>> This is caused by unintentional cflags reset.
>> Fixed the issue by appending required flag to cflags instead of overriding it.
>>
>> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
>> Cc: xavier.huwei@huawei.com
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>   drivers/net/hns3/meson.build | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
>> index 45cee34d9..798086357 100644
>> --- a/drivers/net/hns3/meson.build
>> +++ b/drivers/net/hns3/meson.build
>> @@ -32,7 +32,7 @@ deps += ['hash']
>>   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
>>   	sources += files('hns3_rxtx_vec.c')
>>   	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
>> -		cflags = ['-DCC_SVE_SUPPORT']
>> +		cflags += ['-DCC_SVE_SUPPORT']
> This comment is unrelated to this patch. We need to be consistent with the macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to define an additional flag, I would name it something like 'RTE_ARM_FEATURE_SVE'.
> 
I think the __ARM_FEATURE_SVE is ok. if use the gcc version included SVE 
flag, it will be identified as __ARM_FEATURE_SVE. it is defined in the 
ARM SVE document.
>>   		sources += files('hns3_rxtx_vec_sve.c')
>>   	endif
>>   endif
>> --
>> 2.25.1
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
  2021-01-09  0:06     ` Honnappa Nagarahalli
@ 2021-01-09  2:15     ` oulijun
  2021-01-11  2:27       ` Ruifeng Wang
  1 sibling, 1 reply; 43+ messages in thread
From: oulijun @ 2021-01-09  2:15 UTC (permalink / raw)
  To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, stable



在 2021/1/8 16:25, Ruifeng Wang 写道:
> Building with SVE extension enabled stopped with error:
> 
>   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
>     18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> 
> This is caused by unintentional cflags reset.
> Fixed the issue by appending required flag to cflags instead of
> overriding it.
> 
> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> Cc: xavier.huwei@huawei.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   drivers/net/hns3/meson.build | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
> index 45cee34d9..798086357 100644
> --- a/drivers/net/hns3/meson.build
> +++ b/drivers/net/hns3/meson.build
> @@ -32,7 +32,7 @@ deps += ['hash']
>   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
>   	sources += files('hns3_rxtx_vec.c')
>   	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> -		cflags = ['-DCC_SVE_SUPPORT']
> +		cflags += ['-DCC_SVE_SUPPORT']
Hi
   I noticed this patch, but I checked that the hns3 driver did not use 
this function.How did you compile it?

Thanks
Lijun Ou
>   		sources += files('hns3_rxtx_vec_sve.c')
>   	endif
>   endif
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-09  2:15     ` oulijun
@ 2021-01-11  2:27       ` Ruifeng Wang
  0 siblings, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-11  2:27 UTC (permalink / raw)
  To: oulijun, Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal,
	Honnappa Nagarahalli, nd, stable, nd


> -----Original Message-----
> From: oulijun <oulijun@huawei.com>
> Sent: Saturday, January 9, 2021 10:16 AM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Wei Hu (Xavier)
> <xavier.huwei@huawei.com>; Min Hu (Connor) <humin29@huawei.com>;
> Yisen Zhuang <yisen.zhuang@huawei.com>; Huisong Li
> <lihuisong@huawei.com>; Chengchang Tang
> <tangchengchang@huawei.com>; Chengwen Feng
> <fengchengwen@huawei.com>
> Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>; stable@dpdk.org
> Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled
> 
> 
> 
> 在 2021/1/8 16:25, Ruifeng Wang 写道:
> > Building with SVE extension enabled stopped with error:
> >
> >   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
> >     18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> >
> > This is caused by unintentional cflags reset.
> > Fixed the issue by appending required flag to cflags instead of
> > overriding it.
> >
> > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> > Cc: xavier.huwei@huawei.com
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >   drivers/net/hns3/meson.build | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/hns3/meson.build
> > b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644
> > --- a/drivers/net/hns3/meson.build
> > +++ b/drivers/net/hns3/meson.build
> > @@ -32,7 +32,7 @@ deps += ['hash']
> >   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
> >   	sources += files('hns3_rxtx_vec.c')
> >   	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> > -		cflags = ['-DCC_SVE_SUPPORT']
> > +		cflags += ['-DCC_SVE_SUPPORT']
> Hi
>    I noticed this patch, but I checked that the hns3 driver did not use this
> function.How did you compile it?

Hi,
The hns3 driver has sve rx/tx implementation in hns3_rxtx_vec_sve.c. This path
will be enabled when compiling with sve feature enabled.

I compiled it by using gcc-10.2 with flag '-march=armv8.3-a+sve'. 
You can try compile for n2 with the cross file added in this series (5/5).

Thanks,
Ruifeng
> 
> Thanks
> Lijun Ou
> >   		sources += files('hns3_rxtx_vec_sve.c')
> >   	endif
> >   endif
> >

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-09  2:11       ` oulijun
@ 2021-01-11  2:39         ` Ruifeng Wang
  2021-01-11 13:38           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-11  2:39 UTC (permalink / raw)
  To: oulijun, Honnappa Nagarahalli, Min Hu (Connor),
	Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable, nd


> -----Original Message-----
> From: oulijun <oulijun@huawei.com>
> Sent: Saturday, January 9, 2021 10:12 AM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Wei Hu (Xavier) <xavier.huwei@huawei.com>;
> Min Hu (Connor) <humin29@huawei.com>; Yisen Zhuang
> <yisen.zhuang@huawei.com>; Huisong Li <lihuisong@huawei.com>;
> Chengchang Tang <tangchengchang@huawei.com>; Chengwen Feng
> <fengchengwen@huawei.com>
> Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; nd <nd@arm.com>; stable@dpdk.org
> Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled
> 
> 
> 在 2021/1/9 8:06, Honnappa Nagarahalli 写道:
> > <snip>
> >
> >>
> >> Building with SVE extension enabled stopped with error:
> >>
> >>   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
> >>     18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> >>
> >> This is caused by unintentional cflags reset.
> >> Fixed the issue by appending required flag to cflags instead of overriding it.
> >>
> >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> >> Cc: xavier.huwei@huawei.com
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >> ---
> >>   drivers/net/hns3/meson.build | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/net/hns3/meson.build
> >> b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644
> >> --- a/drivers/net/hns3/meson.build
> >> +++ b/drivers/net/hns3/meson.build
> >> @@ -32,7 +32,7 @@ deps += ['hash']
> >>   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
> >>   	sources += files('hns3_rxtx_vec.c')
> >>   	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> >> -		cflags = ['-DCC_SVE_SUPPORT']
> >> +		cflags += ['-DCC_SVE_SUPPORT']
> > This comment is unrelated to this patch. We need to be consistent with the
> macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to
> define an additional flag, I would name it something like
> 'RTE_ARM_FEATURE_SVE'.
> >
> I think the __ARM_FEATURE_SVE is ok. if use the gcc version included SVE
> flag, it will be identified as __ARM_FEATURE_SVE. it is defined in the ARM
> SVE document.

Yes, we can rely on flags defined by compiler and no extra flag is needed.
I can update in next version to remove this section from meson file and replace CC_SVE_SUPPORT in code.
> >>   		sources += files('hns3_rxtx_vec_sve.c')
> >>   	endif
> >>   endif
> >> --
> >> 2.25.1
> >

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-08 23:58     ` Honnappa Nagarahalli
@ 2021-01-11  3:01       ` Ruifeng Wang
  2021-01-11  3:09         ` Jerin Jacob
  0 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-11  3:01 UTC (permalink / raw)
  To: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson
  Cc: dev, vladimir.medvedkin, hemant.agrawal, nd, nd, nd


> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Saturday, January 9, 2021 7:58 AM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Ruifeng
> Wang <Ruifeng.Wang@arm.com>; Jan Viktorin <viktorin@rehivetech.com>;
> Bruce Richardson <bruce.richardson@intel.com>
> Cc: dev@dpdk.org; vladimir.medvedkin@intel.com;
> hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2
> 
> + Juraj
> 
> Please note that this clashes with Juraj's patch for meson rework.

Yes. I didn't base it on the build options rework series.
I will rebase when that series got merged.
> 
> <snip>
> 
> >
> > Add Arm Neoverse N2 cpu support.
> >
> > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > ---
> >  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
> >  config/arm/meson.build        | 11 ++++++++++-
> >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> > config/arm/arm64_n2_linux_gcc
> >
> > diff --git a/config/arm/arm64_n2_linux_gcc
> > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > 000000000..78f6f3e2b
> > --- /dev/null
> > +++ b/config/arm/arm64_n2_linux_gcc
> > @@ -0,0 +1,17 @@
> > +[binaries]
> > +c = 'aarch64-linux-gnu-gcc'
> > +cpp = 'aarch64-linux-gnu-cpp'
> > +ar = 'aarch64-linux-gnu-gcc-ar'
> > +strip = 'aarch64-linux-gnu-strip'
> > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > +pcap-config = ''
> > +
> > +[host_machine]
> > +system = 'linux'
> > +cpu_family = 'aarch64'
> > +cpu = 'armv8-a'
> > +endian = 'little'
> > +
> > +[properties]
> > +implementor_id = '0x41'
> > +implementor_pn = '0xd49'
> > diff --git a/config/arm/meson.build b/config/arm/meson.build index
> > 42b4e43c7..58e0ae643 100644
> > --- a/config/arm/meson.build
> > +++ b/config/arm/meson.build
> > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> >  	['RTE_MAX_NUMA_NODES', 1],
> >  	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> >  	['RTE_LIBRTE_VHOST_NUMA', false]]
> > +flags_n2generic_extra = [
> > +	['RTE_MACHINE', '"neoverse-n2"'],
> > +	['RTE_MAX_LCORE', 64],
> > +	['RTE_CACHE_LINE_SIZE', 64],
> > +	['RTE_ARM_FEATURE_ATOMICS', true],
> > +	['RTE_USE_C11_MEM_MODEL', true],
> > +	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > +	['RTE_LIBRTE_VHOST_NUMA', false]]
> Do we need a flag RTE_ARM_FEATURE_SVE?

I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from compiler.
One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when we are
writing inline assembly with sve instructions and using compiler that has no sve support.
I'm not sure we will have sve inline assembly as C intrinsics are available.
> 
> >
> >  machine_args_generic = [
> >  	['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> > +108,8 @@ machine_args_generic = [
> >  	['0xd09', ['-mcpu=cortex-a73']],
> >  	['0xd0a', ['-mcpu=cortex-a75']],
> >  	['0xd0b', ['-mcpu=cortex-a76']],
> > -	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > flags_n1generic_extra]]
> > +	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > flags_n1generic_extra],
> > +	['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
> Should this be 'sve2'? There should be a flag to indicate SVE2.

Yes. N2 supports sve2 and sve2 is superset of sve.
I will do the change in next version.
> 
> >
> >  machine_args_cavium = [
> >  	['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > --
> > 2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-11  3:01       ` Ruifeng Wang
@ 2021-01-11  3:09         ` Jerin Jacob
  2021-01-11  8:32           ` Ruifeng Wang
  0 siblings, 1 reply; 43+ messages in thread
From: Jerin Jacob @ 2021-01-11  3:09 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson,
	dev, vladimir.medvedkin, hemant.agrawal, nd

On Mon, Jan 11, 2021 at 8:31 AM Ruifeng Wang <Ruifeng.Wang@arm.com> wrote:
>
>
> > -----Original Message-----
> > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> > Sent: Saturday, January 9, 2021 7:58 AM
> > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Ruifeng
> > Wang <Ruifeng.Wang@arm.com>; Jan Viktorin <viktorin@rehivetech.com>;
> > Bruce Richardson <bruce.richardson@intel.com>
> > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com;
> > hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2
> >
> > + Juraj
> >
> > Please note that this clashes with Juraj's patch for meson rework.
>
> Yes. I didn't base it on the build options rework series.
> I will rebase when that series got merged.
> >
> > <snip>
> >
> > >
> > > Add Arm Neoverse N2 cpu support.
> > >
> > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > ---
> > >  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
> > >  config/arm/meson.build        | 11 ++++++++++-
> > >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> > > config/arm/arm64_n2_linux_gcc
> > >
> > > diff --git a/config/arm/arm64_n2_linux_gcc
> > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > > 000000000..78f6f3e2b
> > > --- /dev/null
> > > +++ b/config/arm/arm64_n2_linux_gcc
> > > @@ -0,0 +1,17 @@
> > > +[binaries]
> > > +c = 'aarch64-linux-gnu-gcc'
> > > +cpp = 'aarch64-linux-gnu-cpp'
> > > +ar = 'aarch64-linux-gnu-gcc-ar'
> > > +strip = 'aarch64-linux-gnu-strip'
> > > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > > +pcap-config = ''
> > > +
> > > +[host_machine]
> > > +system = 'linux'
> > > +cpu_family = 'aarch64'
> > > +cpu = 'armv8-a'
> > > +endian = 'little'
> > > +
> > > +[properties]
> > > +implementor_id = '0x41'
> > > +implementor_pn = '0xd49'
> > > diff --git a/config/arm/meson.build b/config/arm/meson.build index
> > > 42b4e43c7..58e0ae643 100644
> > > --- a/config/arm/meson.build
> > > +++ b/config/arm/meson.build
> > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> > >     ['RTE_MAX_NUMA_NODES', 1],
> > >     ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > >     ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > +flags_n2generic_extra = [
> > > +   ['RTE_MACHINE', '"neoverse-n2"'],
> > > +   ['RTE_MAX_LCORE', 64],
> > > +   ['RTE_CACHE_LINE_SIZE', 64],
> > > +   ['RTE_ARM_FEATURE_ATOMICS', true],
> > > +   ['RTE_USE_C11_MEM_MODEL', true],
> > > +   ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > +   ['RTE_LIBRTE_VHOST_NUMA', false]]
> > Do we need a flag RTE_ARM_FEATURE_SVE?
>
> I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from compiler.
> One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when we are
> writing inline assembly with sve instructions and using compiler that has no sve support.
> I'm not sure we will have sve inline assembly as C intrinsics are available.

It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any
compiler difference in
future(GCC vs clang or another tool chain etc).


> >
> > >
> > >  machine_args_generic = [
> > >     ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> > > +108,8 @@ machine_args_generic = [
> > >     ['0xd09', ['-mcpu=cortex-a73']],
> > >     ['0xd0a', ['-mcpu=cortex-a75']],
> > >     ['0xd0b', ['-mcpu=cortex-a76']],
> > > -   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > flags_n1generic_extra]]
> > > +   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > flags_n1generic_extra],
> > > +   ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
> > Should this be 'sve2'? There should be a flag to indicate SVE2.
>
> Yes. N2 supports sve2 and sve2 is superset of sve.
> I will do the change in next version.
> >
> > >
> > >  machine_args_cavium = [
> > >     ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > > --
> > > 2.25.1
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-11  3:09         ` Jerin Jacob
@ 2021-01-11  8:32           ` Ruifeng Wang
  2021-01-11 13:58             ` Honnappa Nagarahalli
  0 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-11  8:32 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson,
	dev, vladimir.medvedkin, hemant.agrawal, nd, nd


> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Monday, January 11, 2021 11:09 AM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> jerinj@marvell.com; Jan Viktorin <viktorin@rehivetech.com>; Bruce
> Richardson <bruce.richardson@intel.com>; dev@dpdk.org;
> vladimir.medvedkin@intel.com; hemant.agrawal@nxp.com; nd
> <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
> 
> On Mon, Jan 11, 2021 at 8:31 AM Ruifeng Wang <Ruifeng.Wang@arm.com>
> wrote:
> >
> >
> > > -----Original Message-----
> > > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> > > Sent: Saturday, January 9, 2021 7:58 AM
> > > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com;
> Ruifeng
> > > Wang <Ruifeng.Wang@arm.com>; Jan Viktorin
> <viktorin@rehivetech.com>;
> > > Bruce Richardson <bruce.richardson@intel.com>
> > > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com;
> > > hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli
> > > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> > > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2
> > >
> > > + Juraj
> > >
> > > Please note that this clashes with Juraj's patch for meson rework.
> >
> > Yes. I didn't base it on the build options rework series.
> > I will rebase when that series got merged.
> > >
> > > <snip>
> > >
> > > >
> > > > Add Arm Neoverse N2 cpu support.
> > > >
> > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > ---
> > > >  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
> > > >  config/arm/meson.build        | 11 ++++++++++-
> > > >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode
> > > > 100644 config/arm/arm64_n2_linux_gcc
> > > >
> > > > diff --git a/config/arm/arm64_n2_linux_gcc
> > > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > > > 000000000..78f6f3e2b
> > > > --- /dev/null
> > > > +++ b/config/arm/arm64_n2_linux_gcc
> > > > @@ -0,0 +1,17 @@
> > > > +[binaries]
> > > > +c = 'aarch64-linux-gnu-gcc'
> > > > +cpp = 'aarch64-linux-gnu-cpp'
> > > > +ar = 'aarch64-linux-gnu-gcc-ar'
> > > > +strip = 'aarch64-linux-gnu-strip'
> > > > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > > > +pcap-config = ''
> > > > +
> > > > +[host_machine]
> > > > +system = 'linux'
> > > > +cpu_family = 'aarch64'
> > > > +cpu = 'armv8-a'
> > > > +endian = 'little'
> > > > +
> > > > +[properties]
> > > > +implementor_id = '0x41'
> > > > +implementor_pn = '0xd49'
> > > > diff --git a/config/arm/meson.build b/config/arm/meson.build index
> > > > 42b4e43c7..58e0ae643 100644
> > > > --- a/config/arm/meson.build
> > > > +++ b/config/arm/meson.build
> > > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> > > >     ['RTE_MAX_NUMA_NODES', 1],
> > > >     ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > >     ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > > +flags_n2generic_extra = [
> > > > +   ['RTE_MACHINE', '"neoverse-n2"'],
> > > > +   ['RTE_MAX_LCORE', 64],
> > > > +   ['RTE_CACHE_LINE_SIZE', 64],
> > > > +   ['RTE_ARM_FEATURE_ATOMICS', true],
> > > > +   ['RTE_USE_C11_MEM_MODEL', true],
> > > > +   ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > > +   ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > Do we need a flag RTE_ARM_FEATURE_SVE?
> >
> > I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE
> from compiler.
> > One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed
> > is, when we are writing inline assembly with sve instructions and using
> compiler that has no sve support.
> > I'm not sure we will have sve inline assembly as C intrinsics are available.
> 
> It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any
> compiler difference in future(GCC vs clang or another tool chain etc).

According to Arm C Language Extension (ACLE) for SVE, preprocessor macros
like __ARM_FEATURE_SVE are defined to indicate available features.
GCC and clang have the macros defined. We can have RTE_ARM_FEATURE_SVE
for some other tool chain that don't stick to ACLE. I'll add in next version.

> 
> 
> > >
> > > >
> > > >  machine_args_generic = [
> > > >     ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@
> > > > -100,7
> > > > +108,8 @@ machine_args_generic = [
> > > >     ['0xd09', ['-mcpu=cortex-a73']],
> > > >     ['0xd0a', ['-mcpu=cortex-a75']],
> > > >     ['0xd0b', ['-mcpu=cortex-a76']],
> > > > -   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > > flags_n1generic_extra]]
> > > > +   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > > flags_n1generic_extra],
> > > > +   ['0xd49', ['-march=armv8.5-a+crypto+sve'],
> > > > + flags_n2generic_extra]]
> > > Should this be 'sve2'? There should be a flag to indicate SVE2.
> >
> > Yes. N2 supports sve2 and sve2 is superset of sve.
> > I will do the change in next version.
> > >
> > > >
> > > >  machine_args_cavium = [
> > > >     ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > > > --
> > > > 2.25.1
> >

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled
  2021-01-08 10:29     ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
@ 2021-01-11  9:51       ` Ruifeng Wang
  0 siblings, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-11  9:51 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, jerinj, Nithin Kumar Dabilpuram
  Cc: dev, vladimir.medvedkin, hemant.agrawal, Honnappa Nagarahalli,
	nd, stable, nd


> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com>
> Sent: Friday, January 8, 2021 6:29 PM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Nithin
> Kumar Dabilpuram <ndabilpuram@marvell.com>
> Cc: dev@dpdk.org; vladimir.medvedkin@intel.com;
> hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>; stable@dpdk.org
> Subject: RE: [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve
> enabled
> 
> Hi Ruifeng,
> 
> >Building with gcc 10.2 with SVE extension enabled got error:
> >
> >{standard input}: Assembler messages:
> >{standard input}:4002: Error: selected processor does not support `mov
> >z3.b,#0'
> >{standard input}:4003: Error: selected processor does not support
> >`whilelo p1.b,xzr,x7'
> >{standard input}:4005: Error: selected processor does not support `ld1b
> >z0.b,p1/z,[x8]'
> >{standard input}:4006: Error: selected processor does not support
> >`whilelo p4.s,wzr,w7'
> >
> >This is because inline assembly code explicitly resets cpu model to not
> >have SVE support. Thus SVE instructions generated by compiler auto
> >vectorization got rejected by assembler.
> >
> >Fixed the issue by replacing inline assembly with equivalent atomic
> >built-ins. Compiler will generate LSE instructions for cpu that has the
> >extension.
> >
> >Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
> >Cc: jerinj@marvell.com
> >Cc: stable@dpdk.org
> >
> >Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >---
> > drivers/common/octeontx2/otx2_io_arm64.h | 37 +++--------------------
> >-
> > 1 file changed, 4 insertions(+), 33 deletions(-)
> >
> >diff --git a/drivers/common/octeontx2/otx2_io_arm64.h
> >b/drivers/common/octeontx2/otx2_io_arm64.h
> >index b5c85d9a6..8843a79b5 100644
> >--- a/drivers/common/octeontx2/otx2_io_arm64.h
> >+++ b/drivers/common/octeontx2/otx2_io_arm64.h
> >@@ -24,55 +24,26 @@
> > static __rte_always_inline uint64_t
> > otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)  {
> >-	uint64_t result;
> >-
> > 	/* Atomic add with no ordering */
> >-	asm volatile (
> >-		".cpu  generic+lse\n"
> >-		"ldadd %x[i], %x[r], [%[b]]"
> >-		: [r] "=r" (result), "+m" (*ptr)
> >-		: [i] "r" (incr), [b] "r" (ptr)
> >-		: "memory");
> >-	return result;
> >+	return (uint64_t)__atomic_fetch_add(ptr, incr,
> >__ATOMIC_RELAXED);
> > }
> >
> 
> Here LDADD acts as a way to interface to co-processors i.e.
> LDADD instruction opcode + specific io address are recognized by HW
> interceptor and dispatched to the specific coprocessor.

OK. Now I understand the background.
> 
> Leaving it to the compiler to use the correct instruction is a bad idea.
> This breaks the arm64_armv8_linux_gcc build as it doesn't have the
> +lse enabled.
> __atomic_fetch_add will generate a different instruction with SVE enabled.
> 
> Instead can we add +sve to the first line to prevent outer loop from
> optimizing out the trap?

Since the inline assembly needs to be preserved, we have to tune the enabled extensions.
I will change in next version.

Thanks,
Ruifeng
> 
> I tested with 10.2 and n2 config below change works fine.
> -" .cpu          generic+lse\n"
> +" .cpu		generic+lse+sve\n"
> 
> Regards,
> Pavan.
> 
> > static __rte_always_inline uint64_t
> > otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)  {
> >-	uint64_t result;
> >-
> >-	/* Atomic add with ordering */
> >-	asm volatile (
> >-		".cpu  generic+lse\n"
> >-		"ldadda %x[i], %x[r], [%[b]]"
> >-		: [r] "=r" (result), "+m" (*ptr)
> >-		: [i] "r" (incr), [b] "r" (ptr)
> >-		: "memory");
> >-	return result;
> >+	return (uint64_t)__atomic_fetch_add(ptr, incr,
> >__ATOMIC_ACQUIRE);
> > }
> >
> > static __rte_always_inline uint64_t
> > otx2_lmt_submit(rte_iova_t io_address)  {
> >-	uint64_t result;
> >-
> >-	asm volatile (
> >-		".cpu  generic+lse\n"
> >-		"ldeor xzr,%x[rf],[%[rs]]" :
> >-		 [rf] "=r"(result): [rs] "r"(io_address));
> >-	return result;
> >+	return __atomic_fetch_xor((uint64_t *)io_address, 0,
> >__ATOMIC_RELAXED);
> > }
> >
> > static __rte_always_inline uint64_t
> > otx2_lmt_submit_release(rte_iova_t io_address)  {
> >-	uint64_t result;
> >-
> >-	asm volatile (
> >-		".cpu  generic+lse\n"
> >-		"ldeorl xzr,%x[rf],[%[rs]]" :
> >-		 [rf] "=r"(result) : [rs] "r"(io_address));
> >-	return result;
> >+	return __atomic_fetch_xor((uint64_t *)io_address, 0,
> >__ATOMIC_RELEASE);
> > }
> >
> > static __rte_always_inline void
> >--
> >2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled
  2021-01-11  2:39         ` Ruifeng Wang
@ 2021-01-11 13:38           ` Honnappa Nagarahalli
  0 siblings, 0 replies; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-11 13:38 UTC (permalink / raw)
  To: Ruifeng Wang, oulijun, Min Hu (Connor),
	Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng
  Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable,
	Honnappa Nagarahalli, nd

<snip>

> > >
> > >>
> > >> Building with SVE extension enabled stopped with error:
> > >>
> > >>   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
> > >>     18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> > >>
> > >> This is caused by unintentional cflags reset.
> > >> Fixed the issue by appending required flag to cflags instead of overriding it.
> > >>
> > >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> > >> Cc: xavier.huwei@huawei.com
> > >> Cc: stable@dpdk.org
> > >>
> > >> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > >> ---
> > >>   drivers/net/hns3/meson.build | 2 +-
> > >>   1 file changed, 1 insertion(+), 1 deletion(-)
> > >>
> > >> diff --git a/drivers/net/hns3/meson.build
> > >> b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644
> > >> --- a/drivers/net/hns3/meson.build
> > >> +++ b/drivers/net/hns3/meson.build
> > >> @@ -32,7 +32,7 @@ deps += ['hash']
> > >>   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
> > >>   sources += files('hns3_rxtx_vec.c')
> > >>   if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> > >> -cflags = ['-DCC_SVE_SUPPORT']
> > >> +cflags += ['-DCC_SVE_SUPPORT']
> > > This comment is unrelated to this patch. We need to be consistent
> > > with the
> > macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to
> > define an additional flag, I would name it something like
> > 'RTE_ARM_FEATURE_SVE'.
> > >
> > I think the __ARM_FEATURE_SVE is ok. if use the gcc version included
> > SVE flag, it will be identified as __ARM_FEATURE_SVE. it is defined in
> > the ARM SVE document.
> 
> Yes, we can rely on flags defined by compiler and no extra flag is needed.
> I can update in next version to remove this section from meson file and replace
> CC_SVE_SUPPORT in code.
Sounds good to me.

> > >>   sources += files('hns3_rxtx_vec_sve.c')
> > >>   endif
> > >>   endif
> > >> --
> > >> 2.25.1
> > >


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2
  2021-01-11  8:32           ` Ruifeng Wang
@ 2021-01-11 13:58             ` Honnappa Nagarahalli
  0 siblings, 0 replies; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-11 13:58 UTC (permalink / raw)
  To: Ruifeng Wang, Jerin Jacob
  Cc: jerinj, Jan Viktorin, Bruce Richardson, dev, vladimir.medvedkin,
	hemant.agrawal, nd, Honnappa Nagarahalli, nd

<snip>

> > > >
> > > > + Juraj
> > > >
> > > > Please note that this clashes with Juraj's patch for meson rework.
> > >
> > > Yes. I didn't base it on the build options rework series.
> > > I will rebase when that series got merged.
> > > >
> > > > <snip>
> > > >
> > > > >
> > > > > Add Arm Neoverse N2 cpu support.
> > > > >
> > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > ---
> > > > >  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
> > > > >  config/arm/meson.build        | 11 ++++++++++-
> > > > >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode
> > > > > 100644 config/arm/arm64_n2_linux_gcc
> > > > >
> > > > > diff --git a/config/arm/arm64_n2_linux_gcc
> > > > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > > > > 000000000..78f6f3e2b
> > > > > --- /dev/null
> > > > > +++ b/config/arm/arm64_n2_linux_gcc
> > > > > @@ -0,0 +1,17 @@
> > > > > +[binaries]
> > > > > +c = 'aarch64-linux-gnu-gcc'
> > > > > +cpp = 'aarch64-linux-gnu-cpp'
> > > > > +ar = 'aarch64-linux-gnu-gcc-ar'
> > > > > +strip = 'aarch64-linux-gnu-strip'
> > > > > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > > > > +pcap-config = ''
> > > > > +
> > > > > +[host_machine]
> > > > > +system = 'linux'
> > > > > +cpu_family = 'aarch64'
> > > > > +cpu = 'armv8-a'
> > > > > +endian = 'little'
> > > > > +
> > > > > +[properties]
> > > > > +implementor_id = '0x41'
> > > > > +implementor_pn = '0xd49'
> > > > > diff --git a/config/arm/meson.build b/config/arm/meson.build
> > > > > index
> > > > > 42b4e43c7..58e0ae643 100644
> > > > > --- a/config/arm/meson.build
> > > > > +++ b/config/arm/meson.build
> > > > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> > > > >     ['RTE_MAX_NUMA_NODES', 1],
> > > > >     ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > > >     ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > > > +flags_n2generic_extra = [
> > > > > +   ['RTE_MACHINE', '"neoverse-n2"'],
> > > > > +   ['RTE_MAX_LCORE', 64],
> > > > > +   ['RTE_CACHE_LINE_SIZE', 64],
> > > > > +   ['RTE_ARM_FEATURE_ATOMICS', true],
> > > > > +   ['RTE_USE_C11_MEM_MODEL', true],
> > > > > +   ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > > > +   ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > > Do we need a flag RTE_ARM_FEATURE_SVE?
> > >
> > > I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE
> > from compiler.
> > > One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed
> > > is, when we are writing inline assembly with sve instructions and
> > > using
> > compiler that has no sve support.
> > > I'm not sure we will have sve inline assembly as C intrinsics are available.
> >
> > It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any
> > compiler difference in future(GCC vs clang or another tool chain etc).
> 
> According to Arm C Language Extension (ACLE) for SVE, preprocessor macros
> like __ARM_FEATURE_SVE are defined to indicate available features.
> GCC and clang have the macros defined. We can have RTE_ARM_FEATURE_SVE
> for some other tool chain that don't stick to ACLE. I'll add in next version.
The flag __ARM_FEATURE_SVE is a requirement from ACLE. If it is not defined, it is a bug in the compiler. Since, GCC/Clang define this flag, I am thinking we are fine without defining our own. It avoids checking for this additional flag in the code. We can always add it when we come across a toolchain that does not define this flag (or fix the toolchain).

BTW, this problem exists for __ARM_FEATURE_ATOMICS, it is not defined by Clang. Hence, we have RTE_ARM_FEATURE_ATOMICS. But, it is getting fixed in Clang.

> 
> >
> >
> > > >
> > > > >
> > > > >  machine_args_generic = [
> > > > >     ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@
> > > > > -100,7
> > > > > +108,8 @@ machine_args_generic = [
> > > > >     ['0xd09', ['-mcpu=cortex-a73']],
> > > > >     ['0xd0a', ['-mcpu=cortex-a75']],
> > > > >     ['0xd0b', ['-mcpu=cortex-a76']],
> > > > > -   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > > > flags_n1generic_extra]]
> > > > > +   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > > > flags_n1generic_extra],
> > > > > +   ['0xd49', ['-march=armv8.5-a+crypto+sve'],
> > > > > + flags_n2generic_extra]]
> > > > Should this be 'sve2'? There should be a flag to indicate SVE2.
> > >
> > > Yes. N2 supports sve2 and sve2 is superset of sve.
> > > I will do the change in next version.
> > > >
> > > > >
> > > > >  machine_args_cavium = [
> > > > >     ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > > > > --
> > > > > 2.25.1
> > >


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support
  2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang
  2021-01-05 15:44 ` Medvedkin, Vladimir
  2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
@ 2021-01-12  2:57 ` Ruifeng Wang
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
                     ` (5 more replies)
  2 siblings, 6 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang

Added lpm4 lookupx4 implementation by using Arm SVE extension.
The SVE is Scalable Vector Extension which is exposed to the
user with a vector length agnostic interface.
Refer to [1] for more information about SVE.

Configuration was added for Neoverse N2 CPU which has SVE support.

Some bugs were fixed so compiling with sve enabled can pass.

[1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve

---
v2:
Fixed tbl8 group index calculation. (Vladimir)
Added N2 config.
Fixed compiling when sve was enabled.

Ruifeng Wang (5):
  lpm: add sve support for lookup on Arm platform
  net/hns3: fix build with sve enabled
  net/octeontx: fix build with sve enabled
  common/octeontx2: fix build with sve enabled
  config: add Arm Neoverse N2

 config/arm/arm64_n2_linux_gcc            | 17 +++++
 config/arm/meson.build                   | 11 +++-
 drivers/common/octeontx2/otx2_io_arm64.h | 15 +++--
 drivers/net/hns3/hns3_rxtx.c             |  4 +-
 drivers/net/hns3/meson.build             |  1 -
 drivers/net/octeontx/base/octeontx_io.h  | 10 ++-
 lib/librte_eal/arm/include/rte_vect.h    |  3 +
 lib/librte_lpm/meson.build               |  2 +-
 lib/librte_lpm/rte_lpm.h                 |  4 ++
 lib/librte_lpm/rte_lpm_sve.h             | 83 ++++++++++++++++++++++++
 10 files changed, 139 insertions(+), 11 deletions(-)
 create mode 100644 config/arm/arm64_n2_linux_gcc
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
@ 2021-01-12  2:57   ` Ruifeng Wang
  2021-01-13 15:58     ` David Marchand
  2021-01-27 13:04     ` David Marchand
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
                     ` (4 subsequent siblings)
  5 siblings, 2 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson,
	Vladimir Medvedkin
  Cc: dev, pbhagavatula, hemant.agrawal, honnappa.nagarahalli, nd

Added new path to do lpm4 lookup by using scalable vector extension.
The SVE path will be selected if compiler has flag SVE set.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v2:
Fixed tbl8 group index calculation. (Vladimir)

 lib/librte_eal/arm/include/rte_vect.h |  3 +
 lib/librte_lpm/meson.build            |  2 +-
 lib/librte_lpm/rte_lpm.h              |  4 ++
 lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_lpm/rte_lpm_sve.h

diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
index a739e6e66..093e9122a 100644
--- a/lib/librte_eal/arm/include/rte_vect.h
+++ b/lib/librte_eal/arm/include/rte_vect.h
@@ -9,6 +9,9 @@
 #include "generic/rte_vect.h"
 #include "rte_debug.h"
 #include "arm_neon.h"
+#ifdef __ARM_FEATURE_SVE
+#include <arm_sve.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 6cfc083c5..f93c86640 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
 headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
-headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
 deps += ['hash']
 deps += ['rcu']
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 1afe55cdc..28b57683b 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 	uint32_t defv);
 
 #if defined(RTE_ARCH_ARM)
+#ifdef __ARM_FEATURE_SVE
+#include "rte_lpm_sve.h"
+#else
 #include "rte_lpm_neon.h"
+#endif
 #elif defined(RTE_ARCH_PPC_64)
 #include "rte_lpm_altivec.h"
 #else
diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
new file mode 100644
index 000000000..2e319373e
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_sve.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Arm Limited
+ */
+
+#ifndef _RTE_LPM_SVE_H_
+#define _RTE_LPM_SVE_H_
+
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__rte_internal
+static void
+__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+		uint32_t *__rte_restrict next_hops, const uint32_t n)
+{
+	uint32_t i = 0;
+	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
+	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
+	svbool_t pg = svwhilelt_b32(i, n);
+	svbool_t pv;
+
+	do {
+		v_ip = svld1(pg, &ips[i]);
+		/* Get indices for tbl24[] */
+		v_idx = svlsr_x(pg, v_ip, 8);
+		/* Extract values from tbl24[] */
+		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
+						v_idx);
+
+		/* Create mask with valid set */
+		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
+		/* Create mask with valid and valid_group set */
+		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
+		/* Create predicate for tbl24 entries: (valid && !valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
+		/* Create mask for next_hop in table entry */
+		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		/* Update predicate for tbl24 entries: (valid && valid_group) */
+		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
+		/* Compute tbl8 index */
+		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
+		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
+				v_idx);
+		/* Extract values from tbl8[] */
+		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
+						v_idx);
+		/* Update predicate for tbl8 entries: (valid) */
+		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
+		/* Extract next_hop and write back */
+		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
+		svst1(pv, &next_hops[i], v_hop);
+
+		i += svlen(v_ip);
+		pg = svwhilelt_b32(i, n);
+	} while (svptest_any(svptrue_b32(), pg));
+}
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+		uint32_t defv)
+{
+	uint32_t i, ips[4];
+
+	vst1q_s32((int32_t *)ips, ip);
+	for (i = 0; i < 4; i++)
+		hop[i] = defv;
+
+	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_SVE_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
@ 2021-01-12  2:57   ` Ruifeng Wang
  2021-01-13  2:16     ` Honnappa Nagarahalli
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  To: Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Lijun Ou, Chengwen Feng, Chengchang Tang,
	Huisong Li
  Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang, stable

Building with SVE extension enabled stopped with error:

 error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
   18 | #define PG64_256BIT  svwhilelt_b64(0, 4)

This is caused by unintentional cflags reset.
Fixed the issue by not touching cflags, and using flags defined by
compiler.

Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
Cc: stable@dpdk.org

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v3:
Removed extra flag, use compiler flag instead.

 drivers/net/hns3/hns3_rxtx.c | 4 ++--
 drivers/net/hns3/meson.build | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index 88d3baba4..5ac36b314 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -10,7 +10,7 @@
 #include <rte_io.h>
 #include <rte_net.h>
 #include <rte_malloc.h>
-#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT)
+#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE)
 #include <rte_cpuflags.h>
 #endif
 
@@ -2467,7 +2467,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
 static bool
 hns3_check_sve_support(void)
 {
-#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT)
+#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE))
 		return true;
 #endif
diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
index 45cee34d9..5674d986b 100644
--- a/drivers/net/hns3/meson.build
+++ b/drivers/net/hns3/meson.build
@@ -32,7 +32,6 @@ deps += ['hash']
 if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
 	sources += files('hns3_rxtx_vec.c')
 	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
-		cflags = ['-DCC_SVE_SUPPORT']
 		sources += files('hns3_rxtx_vec_sve.c')
 	endif
 endif
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 3/5] net/octeontx: fix build with sve enabled
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
@ 2021-01-12  2:57   ` Ruifeng Wang
  2021-01-12  4:39     ` Jerin Jacob
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  To: Harman Kalra, Santosh Shukla, Jerin Jacob
  Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal,
	honnappa.nagarahalli, nd, Ruifeng Wang, stable

Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1'
{standard input}:95: Error: selected processor does not support `ptrue p1.d,all'
{standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5'
{standard input}:137: Error: selected processor does not support `decb x1'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Added SVE to the cpu model specified by inline assembly for SVE support.
Not replacing the inline assembly with C atomics because the driver relies
on specific LSE instruction to interface to co-processor [1].

Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations")
Cc: jerinj@marvell.com
Cc: stable@dpdk.org

[1] https://mails.dpdk.org/archives/dev/2021-January/196092.html

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v3:
Keep inline assembly and add sve extension to fix issue. (Pavan)

 drivers/net/octeontx/base/octeontx_io.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h
index 04b9ce191..d0b9cfbc6 100644
--- a/drivers/net/octeontx/base/octeontx_io.h
+++ b/drivers/net/octeontx/base/octeontx_io.h
@@ -52,6 +52,11 @@ do {							\
 #endif
 
 #if defined(RTE_ARCH_ARM64)
+#if defined(__ARM_FEATURE_SVE)
+#define __LSE_PREAMBLE " .cpu	generic+lse+sve\n"
+#else
+#define __LSE_PREAMBLE " .cpu	generic+lse\n"
+#endif
 /**
  * Perform an atomic fetch-and-add operation.
  */
@@ -61,7 +66,7 @@ octeontx_reg_ldadd_u64(void *addr, int64_t off)
 	uint64_t old_val;
 
 	__asm__ volatile(
-		" .cpu		generic+lse\n"
+		__LSE_PREAMBLE
 		" ldadd	%1, %0, [%2]\n"
 		: "=r" (old_val) : "r" (off), "r" (addr) : "memory");
 
@@ -98,12 +103,13 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[],
 
 		/* LDEOR initiates atomic transfer to I/O device */
 		__asm__ volatile(
-			" .cpu		generic+lse\n"
+			__LSE_PREAMBLE
 			" ldeor	xzr, %0, [%1]\n"
 			: "=r" (result) : "r" (ioreg_va) : "memory");
 	} while (!result);
 }
 
+#undef __LSE_PREAMBLE
 #else
 
 static inline uint64_t
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 4/5] common/octeontx2: fix build with sve enabled
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
                     ` (2 preceding siblings ...)
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang
@ 2021-01-12  2:57   ` Ruifeng Wang
  2021-01-12  4:38     ` Jerin Jacob
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang
  2021-01-14 15:18   ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand
  5 siblings, 1 reply; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh
  Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli,
	nd, Ruifeng Wang, stable

Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:4002: Error: selected processor does not support `mov z3.b,#0'
{standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7'
{standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]'
{standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Added SVE to the cpu model specified by inline assembly for SVE support.
Not replacing the inline assembly with C atomics because the driver relies
on specific LSE instruction to interface to co-processor [1].

Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
Cc: jerinj@marvell.com
Cc: stable@dpdk.org

[1] https://mails.dpdk.org/archives/dev/2021-January/196092.html

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v3:
Keep inline assembly and add sve extension to fix issue. (Pavan)

 drivers/common/octeontx2/otx2_io_arm64.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h
index b5c85d9a6..34268e3af 100644
--- a/drivers/common/octeontx2/otx2_io_arm64.h
+++ b/drivers/common/octeontx2/otx2_io_arm64.h
@@ -21,6 +21,12 @@
 #define otx2_prefetch_store_keep(ptr) ({\
 	asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); })
 
+#if defined(__ARM_FEATURE_SVE)
+#define __LSE_PREAMBLE " .cpu  generic+lse+sve\n"
+#else
+#define __LSE_PREAMBLE " .cpu  generic+lse\n"
+#endif
+
 static __rte_always_inline uint64_t
 otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 {
@@ -28,7 +34,7 @@ otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 
 	/* Atomic add with no ordering */
 	asm volatile (
-		".cpu  generic+lse\n"
+		__LSE_PREAMBLE
 		"ldadd %x[i], %x[r], [%[b]]"
 		: [r] "=r" (result), "+m" (*ptr)
 		: [i] "r" (incr), [b] "r" (ptr)
@@ -43,7 +49,7 @@ otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
 
 	/* Atomic add with ordering */
 	asm volatile (
-		".cpu  generic+lse\n"
+		__LSE_PREAMBLE
 		"ldadda %x[i], %x[r], [%[b]]"
 		: [r] "=r" (result), "+m" (*ptr)
 		: [i] "r" (incr), [b] "r" (ptr)
@@ -57,7 +63,7 @@ otx2_lmt_submit(rte_iova_t io_address)
 	uint64_t result;
 
 	asm volatile (
-		".cpu  generic+lse\n"
+		__LSE_PREAMBLE
 		"ldeor xzr,%x[rf],[%[rs]]" :
 		 [rf] "=r"(result): [rs] "r"(io_address));
 	return result;
@@ -69,7 +75,7 @@ otx2_lmt_submit_release(rte_iova_t io_address)
 	uint64_t result;
 
 	asm volatile (
-		".cpu  generic+lse\n"
+		__LSE_PREAMBLE
 		"ldeorl xzr,%x[rf],[%[rs]]" :
 		 [rf] "=r"(result) : [rs] "r"(io_address));
 	return result;
@@ -104,4 +110,5 @@ otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw)
 		dst128[i] = src128[i];
 }
 
+#undef __LSE_PREAMBLE
 #endif /* _OTX2_IO_ARM64_H_ */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
                     ` (3 preceding siblings ...)
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang
@ 2021-01-12  2:57   ` Ruifeng Wang
  2021-01-12  4:44     ` Jerin Jacob
  2021-01-13  2:08     ` Honnappa Nagarahalli
  2021-01-14 15:18   ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand
  5 siblings, 2 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-12  2:57 UTC (permalink / raw)
  To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson
  Cc: dev, vladimir.medvedkin, pbhagavatula, hemant.agrawal,
	honnappa.nagarahalli, nd

Add Arm Neoverse N2 cpu support.

Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
v3:
Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa)

 config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
 config/arm/meson.build        | 11 ++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 config/arm/arm64_n2_linux_gcc

diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc
new file mode 100644
index 000000000..78f6f3e2b
--- /dev/null
+++ b/config/arm/arm64_n2_linux_gcc
@@ -0,0 +1,17 @@
+[binaries]
+c = 'aarch64-linux-gnu-gcc'
+cpp = 'aarch64-linux-gnu-cpp'
+ar = 'aarch64-linux-gnu-gcc-ar'
+strip = 'aarch64-linux-gnu-strip'
+pkgconfig = 'aarch64-linux-gnu-pkg-config'
+pcap-config = ''
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'armv8-a'
+endian = 'little'
+
+[properties]
+implementor_id = '0x41'
+implementor_pn = '0xd49'
diff --git a/config/arm/meson.build b/config/arm/meson.build
index 42b4e43c7..5fd1c40a0 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -89,6 +89,14 @@ flags_n1generic_extra = [
 	['RTE_MAX_NUMA_NODES', 1],
 	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
 	['RTE_LIBRTE_VHOST_NUMA', false]]
+flags_n2generic_extra = [
+	['RTE_MACHINE', '"neoverse-n2"'],
+	['RTE_MAX_LCORE', 64],
+	['RTE_CACHE_LINE_SIZE', 64],
+	['RTE_ARM_FEATURE_ATOMICS', true],
+	['RTE_USE_C11_MEM_MODEL', true],
+	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
+	['RTE_LIBRTE_VHOST_NUMA', false]]
 
 machine_args_generic = [
 	['default', ['-march=armv8-a+crc', '-moutline-atomics']],
@@ -100,7 +108,8 @@ machine_args_generic = [
 	['0xd09', ['-mcpu=cortex-a73']],
 	['0xd0a', ['-mcpu=cortex-a75']],
 	['0xd0b', ['-mcpu=cortex-a76']],
-	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]]
+	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra],
+	['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]]
 
 machine_args_cavium = [
 	['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
-- 
2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 4/5] common/octeontx2: fix build with sve enabled
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang
@ 2021-01-12  4:38     ` Jerin Jacob
  0 siblings, 0 replies; 43+ messages in thread
From: Jerin Jacob @ 2021-01-12  4:38 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh, dpdk-dev,
	Vladimir Medvedkin, Hemant Agrawal, Honnappa Nagarahalli, nd,
	dpdk stable

On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Building with gcc 10.2 with SVE extension enabled got error:
>
> {standard input}: Assembler messages:
> {standard input}:4002: Error: selected processor does not support `mov z3.b,#0'
> {standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7'
> {standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]'
> {standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7'
>
> This is because inline assembly code explicitly resets cpu model to
> not have SVE support. Thus SVE instructions generated by compiler
> auto vectorization got rejected by assembler.
>
> Added SVE to the cpu model specified by inline assembly for SVE support.
> Not replacing the inline assembly with C atomics because the driver relies
> on specific LSE instruction to interface to co-processor [1].
>
> Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
> Cc: jerinj@marvell.com
> Cc: stable@dpdk.org

Reviewed-by: Jerin Jacob <jerinj@marvell.com>



>
> [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
> v3:
> Keep inline assembly and add sve extension to fix issue. (Pavan)
>
>  drivers/common/octeontx2/otx2_io_arm64.h | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h
> index b5c85d9a6..34268e3af 100644
> --- a/drivers/common/octeontx2/otx2_io_arm64.h
> +++ b/drivers/common/octeontx2/otx2_io_arm64.h
> @@ -21,6 +21,12 @@
>  #define otx2_prefetch_store_keep(ptr) ({\
>         asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); })
>
> +#if defined(__ARM_FEATURE_SVE)
> +#define __LSE_PREAMBLE " .cpu  generic+lse+sve\n"
> +#else
> +#define __LSE_PREAMBLE " .cpu  generic+lse\n"
> +#endif
> +
>  static __rte_always_inline uint64_t
>  otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
>  {
> @@ -28,7 +34,7 @@ otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
>
>         /* Atomic add with no ordering */
>         asm volatile (
> -               ".cpu  generic+lse\n"
> +               __LSE_PREAMBLE
>                 "ldadd %x[i], %x[r], [%[b]]"
>                 : [r] "=r" (result), "+m" (*ptr)
>                 : [i] "r" (incr), [b] "r" (ptr)
> @@ -43,7 +49,7 @@ otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
>
>         /* Atomic add with ordering */
>         asm volatile (
> -               ".cpu  generic+lse\n"
> +               __LSE_PREAMBLE
>                 "ldadda %x[i], %x[r], [%[b]]"
>                 : [r] "=r" (result), "+m" (*ptr)
>                 : [i] "r" (incr), [b] "r" (ptr)
> @@ -57,7 +63,7 @@ otx2_lmt_submit(rte_iova_t io_address)
>         uint64_t result;
>
>         asm volatile (
> -               ".cpu  generic+lse\n"
> +               __LSE_PREAMBLE
>                 "ldeor xzr,%x[rf],[%[rs]]" :
>                  [rf] "=r"(result): [rs] "r"(io_address));
>         return result;
> @@ -69,7 +75,7 @@ otx2_lmt_submit_release(rte_iova_t io_address)
>         uint64_t result;
>
>         asm volatile (
> -               ".cpu  generic+lse\n"
> +               __LSE_PREAMBLE
>                 "ldeorl xzr,%x[rf],[%[rs]]" :
>                  [rf] "=r"(result) : [rs] "r"(io_address));
>         return result;
> @@ -104,4 +110,5 @@ otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw)
>                 dst128[i] = src128[i];
>  }
>
> +#undef __LSE_PREAMBLE
>  #endif /* _OTX2_IO_ARM64_H_ */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/5] net/octeontx: fix build with sve enabled
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang
@ 2021-01-12  4:39     ` Jerin Jacob
  0 siblings, 0 replies; 43+ messages in thread
From: Jerin Jacob @ 2021-01-12  4:39 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: Harman Kalra, Santosh Shukla, Jerin Jacob, dpdk-dev,
	Vladimir Medvedkin, Pavan Nikhilesh, Jerin Jacob, Hemant Agrawal,
	Honnappa Nagarahalli, nd, dpdk stable

On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Building with gcc 10.2 with SVE extension enabled got error:
>
> {standard input}: Assembler messages:
> {standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1'
> {standard input}:95: Error: selected processor does not support `ptrue p1.d,all'
> {standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5'
> {standard input}:137: Error: selected processor does not support `decb x1'
>
> This is because inline assembly code explicitly resets cpu model to
> not have SVE support. Thus SVE instructions generated by compiler
> auto vectorization got rejected by assembler.
>
> Added SVE to the cpu model specified by inline assembly for SVE support.
> Not replacing the inline assembly with C atomics because the driver relies
> on specific LSE instruction to interface to co-processor [1].
>
> Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations")
> Cc: jerinj@marvell.com
> Cc: stable@dpdk.org
>
> [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>


Reviewed-by: Jerin Jacob <jerinj@marvell.com>


> ---
> v3:
> Keep inline assembly and add sve extension to fix issue. (Pavan)
>
>  drivers/net/octeontx/base/octeontx_io.h | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h
> index 04b9ce191..d0b9cfbc6 100644
> --- a/drivers/net/octeontx/base/octeontx_io.h
> +++ b/drivers/net/octeontx/base/octeontx_io.h
> @@ -52,6 +52,11 @@ do {                                                 \
>  #endif
>
>  #if defined(RTE_ARCH_ARM64)
> +#if defined(__ARM_FEATURE_SVE)
> +#define __LSE_PREAMBLE " .cpu  generic+lse+sve\n"
> +#else
> +#define __LSE_PREAMBLE " .cpu  generic+lse\n"
> +#endif
>  /**
>   * Perform an atomic fetch-and-add operation.
>   */
> @@ -61,7 +66,7 @@ octeontx_reg_ldadd_u64(void *addr, int64_t off)
>         uint64_t old_val;
>
>         __asm__ volatile(
> -               " .cpu          generic+lse\n"
> +               __LSE_PREAMBLE
>                 " ldadd %1, %0, [%2]\n"
>                 : "=r" (old_val) : "r" (off), "r" (addr) : "memory");
>
> @@ -98,12 +103,13 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[],
>
>                 /* LDEOR initiates atomic transfer to I/O device */
>                 __asm__ volatile(
> -                       " .cpu          generic+lse\n"
> +                       __LSE_PREAMBLE
>                         " ldeor xzr, %0, [%1]\n"
>                         : "=r" (result) : "r" (ioreg_va) : "memory");
>         } while (!result);
>  }
>
> +#undef __LSE_PREAMBLE
>  #else
>
>  static inline uint64_t
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang
@ 2021-01-12  4:44     ` Jerin Jacob
  2021-01-13  2:08     ` Honnappa Nagarahalli
  1 sibling, 0 replies; 43+ messages in thread
From: Jerin Jacob @ 2021-01-12  4:44 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: Jerin Jacob, Jan Viktorin, Bruce Richardson, dpdk-dev,
	Vladimir Medvedkin, Pavan Nikhilesh, Hemant Agrawal,
	Honnappa Nagarahalli, nd

On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Add Arm Neoverse N2 cpu support.
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>


Acked-by: Jerin Jacob <jerinj@marvell.com>


> ---
> v3:
> Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa)
>
>  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
>  config/arm/meson.build        | 11 ++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)
>  create mode 100644 config/arm/arm64_n2_linux_gcc
>
> diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc
> new file mode 100644
> index 000000000..78f6f3e2b
> --- /dev/null
> +++ b/config/arm/arm64_n2_linux_gcc
> @@ -0,0 +1,17 @@
> +[binaries]
> +c = 'aarch64-linux-gnu-gcc'
> +cpp = 'aarch64-linux-gnu-cpp'
> +ar = 'aarch64-linux-gnu-gcc-ar'
> +strip = 'aarch64-linux-gnu-strip'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +pcap-config = ''
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +implementor_id = '0x41'
> +implementor_pn = '0xd49'
> diff --git a/config/arm/meson.build b/config/arm/meson.build
> index 42b4e43c7..5fd1c40a0 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -89,6 +89,14 @@ flags_n1generic_extra = [
>         ['RTE_MAX_NUMA_NODES', 1],
>         ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
>         ['RTE_LIBRTE_VHOST_NUMA', false]]
> +flags_n2generic_extra = [
> +       ['RTE_MACHINE', '"neoverse-n2"'],
> +       ['RTE_MAX_LCORE', 64],
> +       ['RTE_CACHE_LINE_SIZE', 64],
> +       ['RTE_ARM_FEATURE_ATOMICS', true],
> +       ['RTE_USE_C11_MEM_MODEL', true],
> +       ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> +       ['RTE_LIBRTE_VHOST_NUMA', false]]
>
>  machine_args_generic = [
>         ['default', ['-march=armv8-a+crc', '-moutline-atomics']],
> @@ -100,7 +108,8 @@ machine_args_generic = [
>         ['0xd09', ['-mcpu=cortex-a73']],
>         ['0xd0a', ['-mcpu=cortex-a75']],
>         ['0xd0b', ['-mcpu=cortex-a76']],
> -       ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]]
> +       ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra],
> +       ['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]]
>
>  machine_args_cavium = [
>         ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang
  2021-01-12  4:44     ` Jerin Jacob
@ 2021-01-13  2:08     ` Honnappa Nagarahalli
  1 sibling, 0 replies; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-13  2:08 UTC (permalink / raw)
  To: Ruifeng Wang, jerinj, Ruifeng Wang, Jan Viktorin, Bruce Richardson
  Cc: dev, vladimir.medvedkin, pbhagavatula, hemant.agrawal, nd,
	Honnappa Nagarahalli, nd

<snip>

> 
> Add Arm Neoverse N2 cpu support.
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good

Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> v3:
> Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa)
> 
>  config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++
>  config/arm/meson.build        | 11 ++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> config/arm/arm64_n2_linux_gcc
> 
> diff --git a/config/arm/arm64_n2_linux_gcc
> b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> 000000000..78f6f3e2b
> --- /dev/null
> +++ b/config/arm/arm64_n2_linux_gcc
> @@ -0,0 +1,17 @@
> +[binaries]
> +c = 'aarch64-linux-gnu-gcc'
> +cpp = 'aarch64-linux-gnu-cpp'
> +ar = 'aarch64-linux-gnu-gcc-ar'
> +strip = 'aarch64-linux-gnu-strip'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +pcap-config = ''
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +implementor_id = '0x41'
> +implementor_pn = '0xd49'
> diff --git a/config/arm/meson.build b/config/arm/meson.build index
> 42b4e43c7..5fd1c40a0 100644
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -89,6 +89,14 @@ flags_n1generic_extra = [
>  	['RTE_MAX_NUMA_NODES', 1],
>  	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
>  	['RTE_LIBRTE_VHOST_NUMA', false]]
> +flags_n2generic_extra = [
> +	['RTE_MACHINE', '"neoverse-n2"'],
> +	['RTE_MAX_LCORE', 64],
> +	['RTE_CACHE_LINE_SIZE', 64],
> +	['RTE_ARM_FEATURE_ATOMICS', true],
> +	['RTE_USE_C11_MEM_MODEL', true],
> +	['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> +	['RTE_LIBRTE_VHOST_NUMA', false]]
> 
>  machine_args_generic = [
>  	['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> +108,8 @@ machine_args_generic = [
>  	['0xd09', ['-mcpu=cortex-a73']],
>  	['0xd0a', ['-mcpu=cortex-a75']],
>  	['0xd0b', ['-mcpu=cortex-a76']],
> -	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> flags_n1generic_extra]]
> +	['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> flags_n1generic_extra],
> +	['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]]
> 
>  machine_args_cavium = [
>  	['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
@ 2021-01-13  2:16     ` Honnappa Nagarahalli
  0 siblings, 0 replies; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-13  2:16 UTC (permalink / raw)
  To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor),
	Yisen Zhuang, Lijun Ou, Chengwen Feng, Chengchang Tang,
	Huisong Li
  Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal,
	nd, Ruifeng Wang, stable, Honnappa Nagarahalli, nd

<snip>

> 
> Building with SVE extension enabled stopped with error:
> 
>  error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
>    18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> 
> This is caused by unintentional cflags reset.
> Fixed the issue by not touching cflags, and using flags defined by compiler.
> 
> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>

Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> v3:
> Removed extra flag, use compiler flag instead.
> 
>  drivers/net/hns3/hns3_rxtx.c | 4 ++--
>  drivers/net/hns3/meson.build | 1 -
>  2 files changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
> index 88d3baba4..5ac36b314 100644
> --- a/drivers/net/hns3/hns3_rxtx.c
> +++ b/drivers/net/hns3/hns3_rxtx.c
> @@ -10,7 +10,7 @@
>  #include <rte_io.h>
>  #include <rte_net.h>
>  #include <rte_malloc.h>
> -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT)
> +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE)
>  #include <rte_cpuflags.h>
>  #endif
> 
> @@ -2467,7 +2467,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev
> *dev, __rte_unused uint16_t queue_id,  static bool
>  hns3_check_sve_support(void)
>  {
> -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT)
> +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE)
>  	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE))
>  		return true;
>  #endif
> diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build
> index 45cee34d9..5674d986b 100644
> --- a/drivers/net/hns3/meson.build
> +++ b/drivers/net/hns3/meson.build
> @@ -32,7 +32,6 @@ deps += ['hash']
>  if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
>  	sources += files('hns3_rxtx_vec.c')
>  	if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> -		cflags = ['-DCC_SVE_SUPPORT']
>  		sources += files('hns3_rxtx_vec_sve.c')
>  	endif
>  endif
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
@ 2021-01-13 15:58     ` David Marchand
  2021-01-27 13:04     ` David Marchand
  1 sibling, 0 replies; 43+ messages in thread
From: David Marchand @ 2021-01-13 15:58 UTC (permalink / raw)
  To: Ruifeng Wang, Vladimir Medvedkin, Bruce Richardson, Jerin Jacob,
	Honnappa Nagarahalli
  Cc: Jan Viktorin, dev, Pavan Nikhilesh, Hemant Agrawal, nd

On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Added new path to do lpm4 lookup by using scalable vector extension.
> The SVE path will be selected if compiler has flag SVE set.
>
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>

Review please?


-- 
David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
@ 2021-01-13 18:54     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 43+ messages in thread
From: Medvedkin, Vladimir @ 2021-01-13 18:54 UTC (permalink / raw)
  To: Ruifeng Wang, Jan Viktorin, Jerin Jacob, Bruce Richardson
  Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd



On 08/01/2021 08:25, Ruifeng Wang wrote:
> Added new path to do lpm4 lookup by using scalable vector extension.
> The SVE path will be selected if compiler has flag SVE set.
> 
> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   lib/librte_eal/arm/include/rte_vect.h |  3 +
>   lib/librte_lpm/meson.build            |  2 +-
>   lib/librte_lpm/rte_lpm.h              |  4 ++
>   lib/librte_lpm/rte_lpm_sve.h          | 83 +++++++++++++++++++++++++++
>   4 files changed, 91 insertions(+), 1 deletion(-)
>   create mode 100644 lib/librte_lpm/rte_lpm_sve.h
> 
> diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h
> index a739e6e66..093e9122a 100644
> --- a/lib/librte_eal/arm/include/rte_vect.h
> +++ b/lib/librte_eal/arm/include/rte_vect.h
> @@ -9,6 +9,9 @@
>   #include "generic/rte_vect.h"
>   #include "rte_debug.h"
>   #include "arm_neon.h"
> +#ifdef __ARM_FEATURE_SVE
> +#include <arm_sve.h>
> +#endif
>   
>   #ifdef __cplusplus
>   extern "C" {
> diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
> index 6cfc083c5..f93c86640 100644
> --- a/lib/librte_lpm/meson.build
> +++ b/lib/librte_lpm/meson.build
> @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c')
>   headers = files('rte_lpm.h', 'rte_lpm6.h')
>   # since header files have different names, we can install all vector headers
>   # without worrying about which architecture we actually need
> -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
> +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h')
>   deps += ['hash']
>   deps += ['rcu']
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 1afe55cdc..28b57683b 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>   	uint32_t defv);
>   
>   #if defined(RTE_ARCH_ARM)
> +#ifdef __ARM_FEATURE_SVE
> +#include "rte_lpm_sve.h"
> +#else
>   #include "rte_lpm_neon.h"
> +#endif
>   #elif defined(RTE_ARCH_PPC_64)
>   #include "rte_lpm_altivec.h"
>   #else
> diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
> new file mode 100644
> index 000000000..2e319373e
> --- /dev/null
> +++ b/lib/librte_lpm/rte_lpm_sve.h
> @@ -0,0 +1,83 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Arm Limited
> + */
> +
> +#ifndef _RTE_LPM_SVE_H_
> +#define _RTE_LPM_SVE_H_
> +
> +#include <rte_vect.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +__rte_internal
> +static void
> +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> +		uint32_t *__rte_restrict next_hops, const uint32_t n)
> +{
> +	uint32_t i = 0;
> +	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop;
> +	svuint32_t v_mask_xv, v_mask_v, v_mask_hop;
> +	svbool_t pg = svwhilelt_b32(i, n);
> +	svbool_t pv;
> +
> +	do {
> +		v_ip = svld1(pg, &ips[i]);
> +		/* Get indices for tbl24[] */
> +		v_idx = svlsr_x(pg, v_ip, 8);
> +		/* Extract values from tbl24[] */
> +		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
> +						v_idx);
> +
> +		/* Create mask with valid set */
> +		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
> +		/* Create mask with valid and valid_group set */
> +		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
> +		/* Create predicate for tbl24 entries: (valid && !valid_group) */
> +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
> +		/* Create mask for next_hop in table entry */
> +		v_mask_hop = svdup_u32_z(pg, 0x00ffffff);
> +		/* Extract next_hop and write back */
> +		v_hop = svand_x(pv, v_tbl24, v_mask_hop);
> +		svst1(pv, &next_hops[i], v_hop);
> +
> +		/* Update predicate for tbl24 entries: (valid && valid_group) */
> +		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
> +		/* Compute tbl8 index */
> +		v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
> +		v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
> +		v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
> +				v_idx);
> +		/* Extract values from tbl8[] */
> +		v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
> +						v_idx);
> +		/* Update predicate for tbl8 entries: (valid) */
> +		pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
> +		/* Extract next_hop and write back */
> +		v_hop = svand_x(pv, v_tbl8, v_mask_hop);
> +		svst1(pv, &next_hops[i], v_hop);
> +
> +		i += svlen(v_ip);
> +		pg = svwhilelt_b32(i, n);
> +	} while (svptest_any(svptrue_b32(), pg));
> +}
> +
> +static inline void
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +		uint32_t defv)
> +{
> +	uint32_t i, ips[4];
> +
> +	vst1q_s32((int32_t *)ips, ip);
> +	for (i = 0; i < 4; i++)
> +		hop[i] = defv;
> +
> +	__rte_lpm_lookup_vec(lpm, ips, hop, 4);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_LPM_SVE_H_ */
> 

Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support
  2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
                     ` (4 preceding siblings ...)
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang
@ 2021-01-14 15:18   ` David Marchand
  2021-01-14 15:40     ` David Marchand
  5 siblings, 1 reply; 43+ messages in thread
From: David Marchand @ 2021-01-14 15:18 UTC (permalink / raw)
  To: Ruifeng Wang, Honnappa Nagarahalli
  Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh,
	Jerin Jacob Kollanukkaran, Hemant Agrawal, nd

Ruifeng, Honnappa,

On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
>
> Added lpm4 lookupx4 implementation by using Arm SVE extension.
> The SVE is Scalable Vector Extension which is exposed to the
> user with a vector length agnostic interface.
> Refer to [1] for more information about SVE.
>
> Configuration was added for Neoverse N2 CPU which has SVE support.
>
> Some bugs were fixed so compiling with sve enabled can pass.
>
> [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve

Can you point at a toolchain that supports SVE without having to
register to some commercial spamming system? :-)
The only aarch64-linux-gnu- toolchain I found on the ARM website is a
8.x gcc that does not seem to support SVE.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support
  2021-01-14 15:18   ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand
@ 2021-01-14 15:40     ` David Marchand
  2021-01-15  7:02       ` Ruifeng Wang
  0 siblings, 1 reply; 43+ messages in thread
From: David Marchand @ 2021-01-14 15:40 UTC (permalink / raw)
  To: Ruifeng Wang, Honnappa Nagarahalli
  Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh,
	Jerin Jacob Kollanukkaran, Hemant Agrawal, nd

On Thu, Jan 14, 2021 at 4:18 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> Ruifeng, Honnappa,
>
> On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
> >
> > Added lpm4 lookupx4 implementation by using Arm SVE extension.
> > The SVE is Scalable Vector Extension which is exposed to the
> > user with a vector length agnostic interface.
> > Refer to [1] for more information about SVE.
> >
> > Configuration was added for Neoverse N2 CPU which has SVE support.
> >
> > Some bugs were fixed so compiling with sve enabled can pass.
> >
> > [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve
>
> Can you point at a toolchain that supports SVE without having to
> register to some commercial spamming system? :-)
> The only aarch64-linux-gnu- toolchain I found on the ARM website is a
> 8.x gcc that does not seem to support SVE.

I tested this using
https://developer.arm.com/-/media/Files/downloads/gnu-a/10.2-2020.11/binrel/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu.tar.xz
But I had to modify the cross compile prefix in
config/arm/arm64_n2_linux_gcc (adding a none_).
I am still interested in a toolchain that works out of the box.

Series applied, thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support
  2021-01-14 15:40     ` David Marchand
@ 2021-01-15  7:02       ` Ruifeng Wang
  0 siblings, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-15  7:02 UTC (permalink / raw)
  To: David Marchand, Honnappa Nagarahalli
  Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh, jerinj, hemant.agrawal, nd, nd


> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, January 14, 2021 11:40 PM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>
> Cc: dev <dev@dpdk.org>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; nd <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support
> 
> On Thu, Jan 14, 2021 at 4:18 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > Ruifeng, Honnappa,
> >
> > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> wrote:
> > >
> > > Added lpm4 lookupx4 implementation by using Arm SVE extension.
> > > The SVE is Scalable Vector Extension which is exposed to the user
> > > with a vector length agnostic interface.
> > > Refer to [1] for more information about SVE.
> > >
> > > Configuration was added for Neoverse N2 CPU which has SVE support.
> > >
> > > Some bugs were fixed so compiling with sve enabled can pass.
> > >
> > > [1]
> > > https://developer.arm.com/tools-and-software/server-and-
> hpc/compile/
> > > arm-instruction-emulator/resources/tutorials/sve
> >
> > Can you point at a toolchain that supports SVE without having to
> > register to some commercial spamming system? :-) The only
> > aarch64-linux-gnu- toolchain I found on the ARM website is a 8.x gcc
> > that does not seem to support SVE.
> 
> I tested this using
> https://developer.arm.com/-/media/Files/downloads/gnu-a/10.2-
> 2020.11/binrel/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu.tar.xz

Yes, gcc-10 has SVE support. Arm cross compilers available at:
https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads

> But I had to modify the cross compile prefix in
> config/arm/arm64_n2_linux_gcc (adding a none_).
> I am still interested in a toolchain that works out of the box.

I think on Ubuntu, it will work out of the box after installing package gcc-10-aarch64-linux-gnu.

> 
> Series applied, thanks.

Thank you.

> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
  2021-01-13 15:58     ` David Marchand
@ 2021-01-27 13:04     ` David Marchand
  2021-01-27 21:03       ` Honnappa Nagarahalli
  2021-01-28  5:47       ` Ruifeng Wang
  1 sibling, 2 replies; 43+ messages in thread
From: David Marchand @ 2021-01-27 13:04 UTC (permalink / raw)
  To: Ruifeng Wang
  Cc: Jerin Jacob, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin,
	dev, Pavan Nikhilesh, Hemant Agrawal, Honnappa Nagarahalli, nd

On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote:
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index 1afe55cdc..28b57683b 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
>         uint32_t defv);
>
>  #if defined(RTE_ARCH_ARM)
> +#ifdef __ARM_FEATURE_SVE
> +#include "rte_lpm_sve.h"
> +#else
>  #include "rte_lpm_neon.h"
> +#endif
>  #elif defined(RTE_ARCH_PPC_64)
>  #include "rte_lpm_altivec.h"
>  #else
> diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h
> new file mode 100644
> index 000000000..2e319373e
> --- /dev/null
> +++ b/lib/librte_lpm/rte_lpm_sve.h
> @@ -0,0 +1,83 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Arm Limited
> + */
> +
> +#ifndef _RTE_LPM_SVE_H_
> +#define _RTE_LPM_SVE_H_
> +
> +#include <rte_vect.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +__rte_internal
> +static void

I was looking into use of the __rte_internal tag in the tree.

This helper is called from a inlined API used by applications, so out
of the DPDK build.
It looks like the compiler is not complaining when compiling examples
(I hacked my env to cross compile with gcc 10 + SVE enabled) but this
seems incorrect to me.

Is there really a need for this helper?
It is only used below afaics.


> +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> +{

[snip]


> +}
> +
> +static inline void
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> +               uint32_t defv)
> +{
> +       uint32_t i, ips[4];
> +
> +       vst1q_s32((int32_t *)ips, ip);
> +       for (i = 0; i < 4; i++)
> +               hop[i] = defv;
> +
> +       __rte_lpm_lookup_vec(lpm, ips, hop, 4);
> +}


--
David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-27 13:04     ` David Marchand
@ 2021-01-27 21:03       ` Honnappa Nagarahalli
  2021-01-28  8:03         ` David Marchand
  2021-01-28  5:47       ` Ruifeng Wang
  1 sibling, 1 reply; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-27 21:03 UTC (permalink / raw)
  To: David Marchand, Ruifeng Wang
  Cc: jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev,
	Pavan Nikhilesh, hemant.agrawal, nd, Honnappa Nagarahalli, nd

<snip>

> 
> On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> wrote:
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > 1afe55cdc..28b57683b 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> xmm_t ip, uint32_t hop[4],
> >         uint32_t defv);
> >
> >  #if defined(RTE_ARCH_ARM)
> > +#ifdef __ARM_FEATURE_SVE
> > +#include "rte_lpm_sve.h"
> > +#else
> >  #include "rte_lpm_neon.h"
> > +#endif
> >  #elif defined(RTE_ARCH_PPC_64)
> >  #include "rte_lpm_altivec.h"
> >  #else
> > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > 000000000..2e319373e
> > --- /dev/null
> > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > @@ -0,0 +1,83 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_LPM_SVE_H_
> > +#define _RTE_LPM_SVE_H_
> > +
> > +#include <rte_vect.h>
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +__rte_internal
> > +static void
> 
> I was looking into use of the __rte_internal tag in the tree.
> 
> This helper is called from a inlined API used by applications, so out of the
> DPDK build.
> It looks like the compiler is not complaining when compiling examples (I
> hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> incorrect to me.
> 
> Is there really a need for this helper?
> It is only used below afaics.
I do not think it is required.

At the same time the commit log when '__rte_internal' was introduced is confusing.
It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI?

> 
> 
> > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> > +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> > +{
> 
> [snip]
> 
> 
> > +}
> > +
> > +static inline void
> > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +               uint32_t defv)
> > +{
> > +       uint32_t i, ips[4];
> > +
> > +       vst1q_s32((int32_t *)ips, ip);
> > +       for (i = 0; i < 4; i++)
> > +               hop[i] = defv;
> > +
> > +       __rte_lpm_lookup_vec(lpm, ips, hop, 4); }
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-27 13:04     ` David Marchand
  2021-01-27 21:03       ` Honnappa Nagarahalli
@ 2021-01-28  5:47       ` Ruifeng Wang
  1 sibling, 0 replies; 43+ messages in thread
From: Ruifeng Wang @ 2021-01-28  5:47 UTC (permalink / raw)
  To: David Marchand
  Cc: jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev,
	Pavan Nikhilesh, hemant.agrawal, Honnappa Nagarahalli, nd, nd

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, January 27, 2021 9:05 PM
> To: Ruifeng Wang <Ruifeng.Wang@arm.com>
> Cc: jerinj@marvell.com; Jan Viktorin <viktorin@rehivetech.com>; Bruce
> Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; dev <dev@dpdk.org>; Pavan Nikhilesh
> <pbhagavatula@marvell.com>; hemant.agrawal@nxp.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on
> Arm platform
> 
> On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> wrote:
> > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > 1afe55cdc..28b57683b 100644
> > --- a/lib/librte_lpm/rte_lpm.h
> > +++ b/lib/librte_lpm/rte_lpm.h
> > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> xmm_t ip, uint32_t hop[4],
> >         uint32_t defv);
> >
> >  #if defined(RTE_ARCH_ARM)
> > +#ifdef __ARM_FEATURE_SVE
> > +#include "rte_lpm_sve.h"
> > +#else
> >  #include "rte_lpm_neon.h"
> > +#endif
> >  #elif defined(RTE_ARCH_PPC_64)
> >  #include "rte_lpm_altivec.h"
> >  #else
> > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > 000000000..2e319373e
> > --- /dev/null
> > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > @@ -0,0 +1,83 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Arm Limited
> > + */
> > +
> > +#ifndef _RTE_LPM_SVE_H_
> > +#define _RTE_LPM_SVE_H_
> > +
> > +#include <rte_vect.h>
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +__rte_internal
> > +static void
> 
> I was looking into use of the __rte_internal tag in the tree.
> 
> This helper is called from a inlined API used by applications, so out of the
> DPDK build.
> It looks like the compiler is not complaining when compiling examples (I
> hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> incorrect to me.
> 
> Is there really a need for this helper?
> It is only used below afaics.

My intention was to keep the helper generic. So it can be used not only in rte_lpm_lookupx4
as below, but also in other lookup functions like rte_lpm_lookup_bulk where number of IPs
to be looked up is not a fixed value.

Will removing __rte_internal tag resolve the issue? 

> 
> 
> > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> > +               uint32_t *__rte_restrict next_hops, const uint32_t n)
> > +{
> 
> [snip]
> 
> 
> > +}
> > +
> > +static inline void
> > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
> > +               uint32_t defv)
> > +{
> > +       uint32_t i, ips[4];
> > +
> > +       vst1q_s32((int32_t *)ips, ip);
> > +       for (i = 0; i < 4; i++)
> > +               hop[i] = defv;
> > +
> > +       __rte_lpm_lookup_vec(lpm, ips, hop, 4); }
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-27 21:03       ` Honnappa Nagarahalli
@ 2021-01-28  8:03         ` David Marchand
  2021-01-28 12:24           ` Honnappa Nagarahalli
  0 siblings, 1 reply; 43+ messages in thread
From: David Marchand @ 2021-01-28  8:03 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Ruifeng Wang, jerinj, Jan Viktorin, Bruce Richardson,
	Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, nd

On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli
<Honnappa.Nagarahalli@arm.com> wrote:
>
> <snip>
>
> >
> > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> > wrote:
> > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index
> > > 1afe55cdc..28b57683b 100644
> > > --- a/lib/librte_lpm/rte_lpm.h
> > > +++ b/lib/librte_lpm/rte_lpm.h
> > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> > xmm_t ip, uint32_t hop[4],
> > >         uint32_t defv);
> > >
> > >  #if defined(RTE_ARCH_ARM)
> > > +#ifdef __ARM_FEATURE_SVE
> > > +#include "rte_lpm_sve.h"
> > > +#else
> > >  #include "rte_lpm_neon.h"
> > > +#endif
> > >  #elif defined(RTE_ARCH_PPC_64)
> > >  #include "rte_lpm_altivec.h"
> > >  #else
> > > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > > 000000000..2e319373e
> > > --- /dev/null
> > > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > > @@ -0,0 +1,83 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2020 Arm Limited
> > > + */
> > > +
> > > +#ifndef _RTE_LPM_SVE_H_
> > > +#define _RTE_LPM_SVE_H_
> > > +
> > > +#include <rte_vect.h>
> > > +
> > > +#ifdef __cplusplus
> > > +extern "C" {
> > > +#endif
> > > +
> > > +__rte_internal
> > > +static void
> >
> > I was looking into use of the __rte_internal tag in the tree.
> >
> > This helper is called from a inlined API used by applications, so out of the
> > DPDK build.
> > It looks like the compiler is not complaining when compiling examples (I
> > hacked my env to cross compile with gcc 10 + SVE enabled) but this seems
> > incorrect to me.
> >
> > Is there really a need for this helper?
> > It is only used below afaics.
> I do not think it is required.
>
> At the same time the commit log when '__rte_internal' was introduced is confusing.
> It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI?

It happens that drivers/libraries in DPDK offer some interface for
other parts of the DPDK to use.
But we might want them to keep them hidden to final applications,
because this is purely internal and/or we don't want to guarantee
compatibility in later versions.
For such cases, a function can be marked __rte_internal.


This tag has two impacts:
- a marked symbol is versionned as INTERNAL when exported (so this
does not apply to inlines),
- if an application tries to use a marked API, an error is triggered
at build time to prevent use of such API,


-- 
David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform
  2021-01-28  8:03         ` David Marchand
@ 2021-01-28 12:24           ` Honnappa Nagarahalli
  0 siblings, 0 replies; 43+ messages in thread
From: Honnappa Nagarahalli @ 2021-01-28 12:24 UTC (permalink / raw)
  To: David Marchand
  Cc: Ruifeng Wang, jerinj, Jan Viktorin, Bruce Richardson,
	Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, nd,
	Honnappa Nagarahalli, nd

<snip>

> 
> On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com> wrote:
> >
> > <snip>
> >
> > >
> > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com>
> > > wrote:
> > > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> > > > index 1afe55cdc..28b57683b 100644
> > > > --- a/lib/librte_lpm/rte_lpm.h
> > > > +++ b/lib/librte_lpm/rte_lpm.h
> > > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm,
> > > xmm_t ip, uint32_t hop[4],
> > > >         uint32_t defv);
> > > >
> > > >  #if defined(RTE_ARCH_ARM)
> > > > +#ifdef __ARM_FEATURE_SVE
> > > > +#include "rte_lpm_sve.h"
> > > > +#else
> > > >  #include "rte_lpm_neon.h"
> > > > +#endif
> > > >  #elif defined(RTE_ARCH_PPC_64)
> > > >  #include "rte_lpm_altivec.h"
> > > >  #else
> > > > diff --git a/lib/librte_lpm/rte_lpm_sve.h
> > > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index
> > > > 000000000..2e319373e
> > > > --- /dev/null
> > > > +++ b/lib/librte_lpm/rte_lpm_sve.h
> > > > @@ -0,0 +1,83 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2020 Arm Limited
> > > > + */
> > > > +
> > > > +#ifndef _RTE_LPM_SVE_H_
> > > > +#define _RTE_LPM_SVE_H_
> > > > +
> > > > +#include <rte_vect.h>
> > > > +
> > > > +#ifdef __cplusplus
> > > > +extern "C" {
> > > > +#endif
> > > > +
> > > > +__rte_internal
> > > > +static void
> > >
> > > I was looking into use of the __rte_internal tag in the tree.
> > >
> > > This helper is called from a inlined API used by applications, so
> > > out of the DPDK build.
> > > It looks like the compiler is not complaining when compiling
> > > examples (I hacked my env to cross compile with gcc 10 + SVE
> > > enabled) but this seems incorrect to me.
> > >
> > > Is there really a need for this helper?
> > > It is only used below afaics.
> > I do not think it is required.
> >
> > At the same time the commit log when '__rte_internal' was introduced is
> confusing.
> > It says "Introduce the __rte_internal tag to mark internal ABI function which is
> used only by the drivers or other libraries". Why would an internal function have
> an ABI?
> 
> It happens that drivers/libraries in DPDK offer some interface for other parts of
> the DPDK to use.
> But we might want them to keep them hidden to final applications, because this
> is purely internal and/or we don't want to guarantee compatibility in later
> versions.
> For such cases, a function can be marked __rte_internal.
> 
> 
> This tag has two impacts:
> - a marked symbol is versionned as INTERNAL when exported (so this does not
> apply to inlines),
> - if an application tries to use a marked API, an error is triggered at build time to
> prevent use of such API,
Thanks David, it makes sense now. The word 'internal ABI' in the commit log caused the confusion.
Is this required because all the header files (header files meant for the application and the DPDK internal header files) are in the same directory?

From the above definition, we do not need the internal tag for this function as it is very much internal to LPM library.

> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2021-01-28 12:25 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang
2021-01-05 15:44 ` Medvedkin, Vladimir
2021-01-06 10:11   ` Ruifeng Wang
2021-01-08  8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang
2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
2021-01-13 18:54     ` Medvedkin, Vladimir
2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
2021-01-09  0:06     ` Honnappa Nagarahalli
2021-01-09  2:11       ` oulijun
2021-01-11  2:39         ` Ruifeng Wang
2021-01-11 13:38           ` Honnappa Nagarahalli
2021-01-09  2:15     ` oulijun
2021-01-11  2:27       ` Ruifeng Wang
2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang
2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang
2021-01-08 10:29     ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula
2021-01-11  9:51       ` Ruifeng Wang
2021-01-08  8:25   ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang
2021-01-08 23:58     ` Honnappa Nagarahalli
2021-01-11  3:01       ` Ruifeng Wang
2021-01-11  3:09         ` Jerin Jacob
2021-01-11  8:32           ` Ruifeng Wang
2021-01-11 13:58             ` Honnappa Nagarahalli
2021-01-12  2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang
2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang
2021-01-13 15:58     ` David Marchand
2021-01-27 13:04     ` David Marchand
2021-01-27 21:03       ` Honnappa Nagarahalli
2021-01-28  8:03         ` David Marchand
2021-01-28 12:24           ` Honnappa Nagarahalli
2021-01-28  5:47       ` Ruifeng Wang
2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang
2021-01-13  2:16     ` Honnappa Nagarahalli
2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang
2021-01-12  4:39     ` Jerin Jacob
2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang
2021-01-12  4:38     ` Jerin Jacob
2021-01-12  2:57   ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang
2021-01-12  4:44     ` Jerin Jacob
2021-01-13  2:08     ` Honnappa Nagarahalli
2021-01-14 15:18   ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand
2021-01-14 15:40     ` David Marchand
2021-01-15  7:02       ` Ruifeng Wang

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git