* [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform @ 2020-12-18 10:12 Ruifeng Wang 2021-01-05 15:44 ` Medvedkin, Vladimir ` (2 more replies) 0 siblings, 3 replies; 43+ messages in thread From: Ruifeng Wang @ 2020-12-18 10:12 UTC (permalink / raw) To: Jan Viktorin, Ruifeng Wang, Jerin Jacob, Bruce Richardson, Vladimir Medvedkin Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd Added new path to do lpm4 lookup by using scalable vector extension. The SVE path will be selected if compiler has flag SVE set. Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- lib/librte_eal/arm/include/rte_vect.h | 3 + lib/librte_lpm/meson.build | 2 +- lib/librte_lpm/rte_lpm.h | 4 ++ lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 lib/librte_lpm/rte_lpm_sve.h diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h index a739e6e66..093e9122a 100644 --- a/lib/librte_eal/arm/include/rte_vect.h +++ b/lib/librte_eal/arm/include/rte_vect.h @@ -9,6 +9,9 @@ #include "generic/rte_vect.h" #include "rte_debug.h" #include "arm_neon.h" +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif #ifdef __cplusplus extern "C" { diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build index 6cfc083c5..f93c86640 100644 --- a/lib/librte_lpm/meson.build +++ b/lib/librte_lpm/meson.build @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') headers = files('rte_lpm.h', 'rte_lpm6.h') # since header files have different names, we can install all vector headers # without worrying about which architecture we actually need -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h') deps += ['hash'] deps += ['rcu'] diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index 1afe55cdc..28b57683b 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv); #if defined(RTE_ARCH_ARM) +#ifdef __ARM_FEATURE_SVE +#include "rte_lpm_sve.h" +#else #include "rte_lpm_neon.h" +#endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" #else diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index 000000000..86576ec52 --- /dev/null +++ b/lib/librte_lpm/rte_lpm_sve.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Arm Limited + */ + +#ifndef _RTE_LPM_SVE_H_ +#define _RTE_LPM_SVE_H_ + +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +__rte_internal +static void +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, + uint32_t *__rte_restrict next_hops, const uint32_t n) +{ + uint32_t i = 0; + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; + svbool_t pg = svwhilelt_b32(i, n); + svbool_t pv; + + do { + v_ip = svld1(pg, &ips[i]); + /* Get indices for tbl24[] */ + v_idx = svlsr_x(pg, v_ip, 8); + /* Extract values from tbl24[] */ + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24, + v_idx); + + /* Create mask with valid set */ + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); + /* Create mask with valid and valid_group set */ + v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK); + /* Create predicate for tbl24 entries: (valid && !valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v); + /* Create mask for next_hop in table entry */ + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl24, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + /* Update predicate for tbl24 entries: (valid && valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv); + /* Compute tbl8 index */ + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff)); + v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)), + v_idx); + /* Extract values from tbl8[] */ + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, + v_idx); + /* Update predicate for tbl8 entries: (valid) */ + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl8, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + i += svlen(v_ip); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); +} + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32_t i, ips[4]; + + vst1q_s32((int32_t *)ips, ip); + for (i = 0; i < 4; i++) + hop[i] = defv; + + __rte_lpm_lookup_vec(lpm, ips, hop, 4); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SVE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform 2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang @ 2021-01-05 15:44 ` Medvedkin, Vladimir 2021-01-06 10:11 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang 2 siblings, 1 reply; 43+ messages in thread From: Medvedkin, Vladimir @ 2021-01-05 15:44 UTC (permalink / raw) To: Ruifeng Wang, Jan Viktorin, Jerin Jacob, Bruce Richardson Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd Hi Ruifeng, Thanks for the patch, see comments below On 18/12/2020 10:12, Ruifeng Wang wrote: > Added new path to do lpm4 lookup by using scalable vector extension. > The SVE path will be selected if compiler has flag SVE set. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > lib/librte_eal/arm/include/rte_vect.h | 3 + > lib/librte_lpm/meson.build | 2 +- > lib/librte_lpm/rte_lpm.h | 4 ++ > lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ > 4 files changed, 91 insertions(+), 1 deletion(-) > create mode 100644 lib/librte_lpm/rte_lpm_sve.h > > diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h > index a739e6e66..093e9122a 100644 > --- a/lib/librte_eal/arm/include/rte_vect.h > +++ b/lib/librte_eal/arm/include/rte_vect.h > @@ -9,6 +9,9 @@ > #include "generic/rte_vect.h" > #include "rte_debug.h" > #include "arm_neon.h" > +#ifdef __ARM_FEATURE_SVE > +#include <arm_sve.h> > +#endif > > #ifdef __cplusplus > extern "C" { > diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build > index 6cfc083c5..f93c86640 100644 > --- a/lib/librte_lpm/meson.build > +++ b/lib/librte_lpm/meson.build > @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') > headers = files('rte_lpm.h', 'rte_lpm6.h') > # since header files have different names, we can install all vector headers > # without worrying about which architecture we actually need > -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') > +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h') > deps += ['hash'] > deps += ['rcu'] > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h > index 1afe55cdc..28b57683b 100644 > --- a/lib/librte_lpm/rte_lpm.h > +++ b/lib/librte_lpm/rte_lpm.h > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > uint32_t defv); > > #if defined(RTE_ARCH_ARM) > +#ifdef __ARM_FEATURE_SVE > +#include "rte_lpm_sve.h" > +#else > #include "rte_lpm_neon.h" > +#endif > #elif defined(RTE_ARCH_PPC_64) > #include "rte_lpm_altivec.h" > #else > diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h > new file mode 100644 > index 000000000..86576ec52 > --- /dev/null > +++ b/lib/librte_lpm/rte_lpm_sve.h > @@ -0,0 +1,83 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Arm Limited > + */ > + > +#ifndef _RTE_LPM_SVE_H_ > +#define _RTE_LPM_SVE_H_ > + > +#include <rte_vect.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +__rte_internal > +static void > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > + uint32_t *__rte_restrict next_hops, const uint32_t n) > +{ > + uint32_t i = 0; > + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; > + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; > + svbool_t pg = svwhilelt_b32(i, n); > + svbool_t pv; > + > + do { > + v_ip = svld1(pg, &ips[i]); > + /* Get indices for tbl24[] */ > + v_idx = svlsr_x(pg, v_ip, 8); > + /* Extract values from tbl24[] */ > + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24, > + v_idx); > + > + /* Create mask with valid set */ > + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); > + /* Create mask with valid and valid_group set */ > + v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK); > + /* Create predicate for tbl24 entries: (valid && !valid_group) */ > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v); > + /* Create mask for next_hop in table entry */ > + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); > + /* Extract next_hop and write back */ > + v_hop = svand_x(pv, v_tbl24, v_mask_hop); > + svst1(pv, &next_hops[i], v_hop); > + > + /* Update predicate for tbl24 entries: (valid && valid_group) */ > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv); > + /* Compute tbl8 index */ > + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff)); Loos like here should be v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); because we are using 24 bits to keep tbl8 group. > + v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES); > + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)), > + v_idx); > + /* Extract values from tbl8[] */ > + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, > + v_idx); > + /* Update predicate for tbl8 entries: (valid) */ > + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); > + /* Extract next_hop and write back */ > + v_hop = svand_x(pv, v_tbl8, v_mask_hop); > + svst1(pv, &next_hops[i], v_hop); I'm not an expert, but probably it would be better to merge two stores (svst1) into a single one? > + > + i += svlen(v_ip); > + pg = svwhilelt_b32(i, n); Isn't it better to move the predicate calculation to the beginning of the loop and just do {} while (i < n)? > + } while (svptest_any(svptrue_b32(), pg)); > +} > + > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + uint32_t i, ips[4]; > + > + vst1q_s32((int32_t *)ips, ip); > + for (i = 0; i < 4; i++) > + hop[i] = defv; > + > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_LPM_SVE_H_ */ > -- Regards, Vladimir ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform 2021-01-05 15:44 ` Medvedkin, Vladimir @ 2021-01-06 10:11 ` Ruifeng Wang 0 siblings, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-06 10:11 UTC (permalink / raw) To: Medvedkin, Vladimir, Jan Viktorin, jerinj, Bruce Richardson Cc: dev, hemant.agrawal, Honnappa Nagarahalli, nd, nd > -----Original Message----- > From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com> > Sent: Tuesday, January 5, 2021 11:44 PM > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Jan Viktorin > <viktorin@rehivetech.com>; jerinj@marvell.com; Bruce Richardson > <bruce.richardson@intel.com> > Cc: dev@dpdk.org; hemant.agrawal@nxp.com; Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com> > Subject: Re: [RFC PATCH] lpm: add sve support for lookup on Arm platform > > Hi Ruifeng, > > Thanks for the patch, see comments below Hi Vladimir, Thank you for your review. > > On 18/12/2020 10:12, Ruifeng Wang wrote: > > Added new path to do lpm4 lookup by using scalable vector extension. > > The SVE path will be selected if compiler has flag SVE set. > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > --- > > lib/librte_eal/arm/include/rte_vect.h | 3 + > > lib/librte_lpm/meson.build | 2 +- > > lib/librte_lpm/rte_lpm.h | 4 ++ > > lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ > > 4 files changed, 91 insertions(+), 1 deletion(-) > > create mode 100644 lib/librte_lpm/rte_lpm_sve.h > > > > diff --git a/lib/librte_eal/arm/include/rte_vect.h > > b/lib/librte_eal/arm/include/rte_vect.h > > index a739e6e66..093e9122a 100644 > > --- a/lib/librte_eal/arm/include/rte_vect.h > > +++ b/lib/librte_eal/arm/include/rte_vect.h > > @@ -9,6 +9,9 @@ > > #include "generic/rte_vect.h" > > #include "rte_debug.h" > > #include "arm_neon.h" > > +#ifdef __ARM_FEATURE_SVE > > +#include <arm_sve.h> > > +#endif > > > > #ifdef __cplusplus > > extern "C" { > > diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build > > index 6cfc083c5..f93c86640 100644 > > --- a/lib/librte_lpm/meson.build > > +++ b/lib/librte_lpm/meson.build > > @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') > > headers = files('rte_lpm.h', 'rte_lpm6.h') > > # since header files have different names, we can install all vector headers > > # without worrying about which architecture we actually need > > -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', > > 'rte_lpm_sse.h') > > +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', > > +'rte_lpm_sse.h', 'rte_lpm_sve.h') > > deps += ['hash'] > > deps += ['rcu'] > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index > > 1afe55cdc..28b57683b 100644 > > --- a/lib/librte_lpm/rte_lpm.h > > +++ b/lib/librte_lpm/rte_lpm.h > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, > xmm_t ip, uint32_t hop[4], > > uint32_t defv); > > > > #if defined(RTE_ARCH_ARM) > > +#ifdef __ARM_FEATURE_SVE > > +#include "rte_lpm_sve.h" > > +#else > > #include "rte_lpm_neon.h" > > +#endif > > #elif defined(RTE_ARCH_PPC_64) > > #include "rte_lpm_altivec.h" > > #else > > diff --git a/lib/librte_lpm/rte_lpm_sve.h > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index > > 000000000..86576ec52 > > --- /dev/null > > +++ b/lib/librte_lpm/rte_lpm_sve.h > > @@ -0,0 +1,83 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2020 Arm Limited > > + */ > > + > > +#ifndef _RTE_LPM_SVE_H_ > > +#define _RTE_LPM_SVE_H_ > > + > > +#include <rte_vect.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +__rte_internal > > +static void > > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > > + uint32_t *__rte_restrict next_hops, const uint32_t n) { > > + uint32_t i = 0; > > + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; > > + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; > > + svbool_t pg = svwhilelt_b32(i, n); > > + svbool_t pv; > > + > > + do { > > + v_ip = svld1(pg, &ips[i]); > > + /* Get indices for tbl24[] */ > > + v_idx = svlsr_x(pg, v_ip, 8); > > + /* Extract values from tbl24[] */ > > + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm- > >tbl24, > > + v_idx); > > + > > + /* Create mask with valid set */ > > + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); > > + /* Create mask with valid and valid_group set */ > > + v_mask_xv = svdup_u32_z(pg, > RTE_LPM_VALID_EXT_ENTRY_BITMASK); > > + /* Create predicate for tbl24 entries: (valid && !valid_group) > */ > > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), > v_mask_v); > > + /* Create mask for next_hop in table entry */ > > + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); > > + /* Extract next_hop and write back */ > > + v_hop = svand_x(pv, v_tbl24, v_mask_hop); > > + svst1(pv, &next_hops[i], v_hop); > > + > > + /* Update predicate for tbl24 entries: (valid && valid_group) > */ > > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), > v_mask_xv); > > + /* Compute tbl8 index */ > > + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xff)); > > Loos like here should be > v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); because we are > using 24 bits to keep tbl8 group. Yes, the mask should be 0xffffff. Also noticed there is common issue in all vector lookup implementations (NEON/SSE/ALTIVEC). I'll correct this and fix other vector implementations in next version. > > > > + v_idx = svmul_x(pv, v_idx, > RTE_LPM_TBL8_GROUP_NUM_ENTRIES); > > + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, > 0xff)), > > + v_idx); > > + /* Extract values from tbl8[] */ > > + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, > > + v_idx); > > + /* Update predicate for tbl8 entries: (valid) */ > > + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); > > + /* Extract next_hop and write back */ > > + v_hop = svand_x(pv, v_tbl8, v_mask_hop); > > + svst1(pv, &next_hops[i], v_hop); > > I'm not an expert, but probably it would be better to merge two stores > (svst1) into a single one? I think we can keep current implementation. In most cases, tbl24 will be not expanded. Then SVE predicate for tbl8 processing will be zero. So operations on tbl8 will be null operations. I think it is better not to mix the two stores (from tbl24 and from tbl8). > > > + > > + i += svlen(v_ip); > > + pg = svwhilelt_b32(i, n); > > Isn't it better to move the predicate calculation to the beginning of the loop > and just do {} while (i < n)? Yes, that also works. I think checking on SVE predicates is the suggested way to do vector length agnostic loop. It is more generic and flexible. > > > + } while (svptest_any(svptrue_b32(), pg)); } > > + > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + uint32_t i, ips[4]; > > + > > + vst1q_s32((int32_t *)ips, ip); > > + for (i = 0; i < 4; i++) > > + hop[i] = defv; > > + > > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); } > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* _RTE_LPM_SVE_H_ */ > > > > -- > Regards, > Vladimir ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support 2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-05 15:44 ` Medvedkin, Vladimir @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang ` (4 more replies) 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang 2 siblings, 5 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang Added lpm4 lookupx4 implementation by using Arm SVE extension. The SVE is Scalable Vector Extension which is exposed to the user with a vector length agnostic interface. Refer to [1] for more information about SVE. Configuration was added for Neoverse N2 CPU which has SVE support. Some bugs were fixed so compiling with sve enabled can pass. [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve --- v2: Fixed tbl8 group index calculation. (Vladimir) Added N2 config. Fixed compiling when sve was enabled. Ruifeng Wang (5): lpm: add sve support for lookup on Arm platform net/hns3: fix build with sve enabled net/octeontx: fix build with sve enabled common/octeontx2: fix build with sve enabled config: add Arm Neoverse N2 config/arm/arm64_n2_linux_gcc | 17 +++++ config/arm/meson.build | 11 +++- drivers/common/octeontx2/otx2_io_arm64.h | 37 ++--------- drivers/net/hns3/meson.build | 2 +- drivers/net/octeontx/base/octeontx_io.h | 16 ++--- lib/librte_eal/arm/include/rte_vect.h | 3 + lib/librte_lpm/meson.build | 2 +- lib/librte_lpm/rte_lpm.h | 4 ++ lib/librte_lpm/rte_lpm_sve.h | 83 ++++++++++++++++++++++++ 9 files changed, 127 insertions(+), 48 deletions(-) create mode 100644 config/arm/arm64_n2_linux_gcc create mode 100644 lib/librte_lpm/rte_lpm_sve.h -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-13 18:54 ` Medvedkin, Vladimir 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang ` (3 subsequent siblings) 4 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) To: Jan Viktorin, Ruifeng Wang, Jerin Jacob, Bruce Richardson, Vladimir Medvedkin Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd Added new path to do lpm4 lookup by using scalable vector extension. The SVE path will be selected if compiler has flag SVE set. Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- lib/librte_eal/arm/include/rte_vect.h | 3 + lib/librte_lpm/meson.build | 2 +- lib/librte_lpm/rte_lpm.h | 4 ++ lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 lib/librte_lpm/rte_lpm_sve.h diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h index a739e6e66..093e9122a 100644 --- a/lib/librte_eal/arm/include/rte_vect.h +++ b/lib/librte_eal/arm/include/rte_vect.h @@ -9,6 +9,9 @@ #include "generic/rte_vect.h" #include "rte_debug.h" #include "arm_neon.h" +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif #ifdef __cplusplus extern "C" { diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build index 6cfc083c5..f93c86640 100644 --- a/lib/librte_lpm/meson.build +++ b/lib/librte_lpm/meson.build @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') headers = files('rte_lpm.h', 'rte_lpm6.h') # since header files have different names, we can install all vector headers # without worrying about which architecture we actually need -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h') deps += ['hash'] deps += ['rcu'] diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index 1afe55cdc..28b57683b 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv); #if defined(RTE_ARCH_ARM) +#ifdef __ARM_FEATURE_SVE +#include "rte_lpm_sve.h" +#else #include "rte_lpm_neon.h" +#endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" #else diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index 000000000..2e319373e --- /dev/null +++ b/lib/librte_lpm/rte_lpm_sve.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Arm Limited + */ + +#ifndef _RTE_LPM_SVE_H_ +#define _RTE_LPM_SVE_H_ + +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +__rte_internal +static void +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, + uint32_t *__rte_restrict next_hops, const uint32_t n) +{ + uint32_t i = 0; + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; + svbool_t pg = svwhilelt_b32(i, n); + svbool_t pv; + + do { + v_ip = svld1(pg, &ips[i]); + /* Get indices for tbl24[] */ + v_idx = svlsr_x(pg, v_ip, 8); + /* Extract values from tbl24[] */ + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24, + v_idx); + + /* Create mask with valid set */ + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); + /* Create mask with valid and valid_group set */ + v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK); + /* Create predicate for tbl24 entries: (valid && !valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v); + /* Create mask for next_hop in table entry */ + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl24, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + /* Update predicate for tbl24 entries: (valid && valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv); + /* Compute tbl8 index */ + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); + v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)), + v_idx); + /* Extract values from tbl8[] */ + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, + v_idx); + /* Update predicate for tbl8 entries: (valid) */ + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl8, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + i += svlen(v_ip); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); +} + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32_t i, ips[4]; + + vst1q_s32((int32_t *)ips, ip); + for (i = 0; i < 4; i++) + hop[i] = defv; + + __rte_lpm_lookup_vec(lpm, ips, hop, 4); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SVE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang @ 2021-01-13 18:54 ` Medvedkin, Vladimir 0 siblings, 0 replies; 43+ messages in thread From: Medvedkin, Vladimir @ 2021-01-13 18:54 UTC (permalink / raw) To: Ruifeng Wang, Jan Viktorin, Jerin Jacob, Bruce Richardson Cc: dev, hemant.agrawal, honnappa.nagarahalli, nd On 08/01/2021 08:25, Ruifeng Wang wrote: > Added new path to do lpm4 lookup by using scalable vector extension. > The SVE path will be selected if compiler has flag SVE set. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > lib/librte_eal/arm/include/rte_vect.h | 3 + > lib/librte_lpm/meson.build | 2 +- > lib/librte_lpm/rte_lpm.h | 4 ++ > lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ > 4 files changed, 91 insertions(+), 1 deletion(-) > create mode 100644 lib/librte_lpm/rte_lpm_sve.h > > diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h > index a739e6e66..093e9122a 100644 > --- a/lib/librte_eal/arm/include/rte_vect.h > +++ b/lib/librte_eal/arm/include/rte_vect.h > @@ -9,6 +9,9 @@ > #include "generic/rte_vect.h" > #include "rte_debug.h" > #include "arm_neon.h" > +#ifdef __ARM_FEATURE_SVE > +#include <arm_sve.h> > +#endif > > #ifdef __cplusplus > extern "C" { > diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build > index 6cfc083c5..f93c86640 100644 > --- a/lib/librte_lpm/meson.build > +++ b/lib/librte_lpm/meson.build > @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') > headers = files('rte_lpm.h', 'rte_lpm6.h') > # since header files have different names, we can install all vector headers > # without worrying about which architecture we actually need > -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') > +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h') > deps += ['hash'] > deps += ['rcu'] > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h > index 1afe55cdc..28b57683b 100644 > --- a/lib/librte_lpm/rte_lpm.h > +++ b/lib/librte_lpm/rte_lpm.h > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > uint32_t defv); > > #if defined(RTE_ARCH_ARM) > +#ifdef __ARM_FEATURE_SVE > +#include "rte_lpm_sve.h" > +#else > #include "rte_lpm_neon.h" > +#endif > #elif defined(RTE_ARCH_PPC_64) > #include "rte_lpm_altivec.h" > #else > diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h > new file mode 100644 > index 000000000..2e319373e > --- /dev/null > +++ b/lib/librte_lpm/rte_lpm_sve.h > @@ -0,0 +1,83 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Arm Limited > + */ > + > +#ifndef _RTE_LPM_SVE_H_ > +#define _RTE_LPM_SVE_H_ > + > +#include <rte_vect.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +__rte_internal > +static void > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > + uint32_t *__rte_restrict next_hops, const uint32_t n) > +{ > + uint32_t i = 0; > + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; > + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; > + svbool_t pg = svwhilelt_b32(i, n); > + svbool_t pv; > + > + do { > + v_ip = svld1(pg, &ips[i]); > + /* Get indices for tbl24[] */ > + v_idx = svlsr_x(pg, v_ip, 8); > + /* Extract values from tbl24[] */ > + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24, > + v_idx); > + > + /* Create mask with valid set */ > + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); > + /* Create mask with valid and valid_group set */ > + v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK); > + /* Create predicate for tbl24 entries: (valid && !valid_group) */ > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v); > + /* Create mask for next_hop in table entry */ > + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); > + /* Extract next_hop and write back */ > + v_hop = svand_x(pv, v_tbl24, v_mask_hop); > + svst1(pv, &next_hops[i], v_hop); > + > + /* Update predicate for tbl24 entries: (valid && valid_group) */ > + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv); > + /* Compute tbl8 index */ > + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); > + v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES); > + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)), > + v_idx); > + /* Extract values from tbl8[] */ > + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, > + v_idx); > + /* Update predicate for tbl8 entries: (valid) */ > + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); > + /* Extract next_hop and write back */ > + v_hop = svand_x(pv, v_tbl8, v_mask_hop); > + svst1(pv, &next_hops[i], v_hop); > + > + i += svlen(v_ip); > + pg = svwhilelt_b32(i, n); > + } while (svptest_any(svptrue_b32(), pg)); > +} > + > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + uint32_t i, ips[4]; > + > + vst1q_s32((int32_t *)ips, ip); > + for (i = 0; i < 4; i++) > + hop[i] = defv; > + > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_LPM_SVE_H_ */ > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-09 0:06 ` Honnappa Nagarahalli 2021-01-09 2:15 ` oulijun 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang ` (2 subsequent siblings) 4 siblings, 2 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) To: Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Lijun Ou, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with SVE extension enabled stopped with error: error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ 18 | #define PG64_256BIT svwhilelt_b64(0, 4) This is caused by unintentional cflags reset. Fixed the issue by appending required flag to cflags instead of overriding it. Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") Cc: xavier.huwei@huawei.com Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- drivers/net/hns3/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644 --- a/drivers/net/hns3/meson.build +++ b/drivers/net/hns3/meson.build @@ -32,7 +32,7 @@ deps += ['hash'] if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') sources += files('hns3_rxtx_vec.c') if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' - cflags = ['-DCC_SVE_SUPPORT'] + cflags += ['-DCC_SVE_SUPPORT'] sources += files('hns3_rxtx_vec_sve.c') endif endif -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang @ 2021-01-09 0:06 ` Honnappa Nagarahalli 2021-01-09 2:11 ` oulijun 2021-01-09 2:15 ` oulijun 1 sibling, 1 reply; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-09 0:06 UTC (permalink / raw) To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Lijun Ou, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, Ruifeng Wang, stable, Honnappa Nagarahalli, nd <snip> > > Building with SVE extension enabled stopped with error: > > error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > > This is caused by unintentional cflags reset. > Fixed the issue by appending required flag to cflags instead of overriding it. > > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > Cc: xavier.huwei@huawei.com > Cc: stable@dpdk.org > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > drivers/net/hns3/meson.build | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build > index 45cee34d9..798086357 100644 > --- a/drivers/net/hns3/meson.build > +++ b/drivers/net/hns3/meson.build > @@ -32,7 +32,7 @@ deps += ['hash'] > if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > sources += files('hns3_rxtx_vec.c') > if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > - cflags = ['-DCC_SVE_SUPPORT'] > + cflags += ['-DCC_SVE_SUPPORT'] This comment is unrelated to this patch. We need to be consistent with the macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to define an additional flag, I would name it something like 'RTE_ARM_FEATURE_SVE'. > sources += files('hns3_rxtx_vec_sve.c') > endif > endif > -- > 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-09 0:06 ` Honnappa Nagarahalli @ 2021-01-09 2:11 ` oulijun 2021-01-11 2:39 ` Ruifeng Wang 0 siblings, 1 reply; 43+ messages in thread From: oulijun @ 2021-01-09 2:11 UTC (permalink / raw) To: Honnappa Nagarahalli, Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable 在 2021/1/9 8:06, Honnappa Nagarahalli 写道: > <snip> > >> >> Building with SVE extension enabled stopped with error: >> >> error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ >> 18 | #define PG64_256BIT svwhilelt_b64(0, 4) >> >> This is caused by unintentional cflags reset. >> Fixed the issue by appending required flag to cflags instead of overriding it. >> >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") >> Cc: xavier.huwei@huawei.com >> Cc: stable@dpdk.org >> >> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> >> --- >> drivers/net/hns3/meson.build | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build >> index 45cee34d9..798086357 100644 >> --- a/drivers/net/hns3/meson.build >> +++ b/drivers/net/hns3/meson.build >> @@ -32,7 +32,7 @@ deps += ['hash'] >> if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') >> sources += files('hns3_rxtx_vec.c') >> if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' >> - cflags = ['-DCC_SVE_SUPPORT'] >> + cflags += ['-DCC_SVE_SUPPORT'] > This comment is unrelated to this patch. We need to be consistent with the macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to define an additional flag, I would name it something like 'RTE_ARM_FEATURE_SVE'. > I think the __ARM_FEATURE_SVE is ok. if use the gcc version included SVE flag, it will be identified as __ARM_FEATURE_SVE. it is defined in the ARM SVE document. >> sources += files('hns3_rxtx_vec_sve.c') >> endif >> endif >> -- >> 2.25.1 > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-09 2:11 ` oulijun @ 2021-01-11 2:39 ` Ruifeng Wang 2021-01-11 13:38 ` Honnappa Nagarahalli 0 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-11 2:39 UTC (permalink / raw) To: oulijun, Honnappa Nagarahalli, Min Hu (Connor), Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable, nd > -----Original Message----- > From: oulijun <oulijun@huawei.com> > Sent: Saturday, January 9, 2021 10:12 AM > To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang > <Ruifeng.Wang@arm.com>; Wei Hu (Xavier) <xavier.huwei@huawei.com>; > Min Hu (Connor) <humin29@huawei.com>; Yisen Zhuang > <yisen.zhuang@huawei.com>; Huisong Li <lihuisong@huawei.com>; > Chengchang Tang <tangchengchang@huawei.com>; Chengwen Feng > <fengchengwen@huawei.com> > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; jerinj@marvell.com; > hemant.agrawal@nxp.com; nd <nd@arm.com>; stable@dpdk.org > Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled > > > 在 2021/1/9 8:06, Honnappa Nagarahalli 写道: > > <snip> > > > >> > >> Building with SVE extension enabled stopped with error: > >> > >> error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > >> 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > >> > >> This is caused by unintentional cflags reset. > >> Fixed the issue by appending required flag to cflags instead of overriding it. > >> > >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > >> Cc: xavier.huwei@huawei.com > >> Cc: stable@dpdk.org > >> > >> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > >> --- > >> drivers/net/hns3/meson.build | 2 +- > >> 1 file changed, 1 insertion(+), 1 deletion(-) > >> > >> diff --git a/drivers/net/hns3/meson.build > >> b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644 > >> --- a/drivers/net/hns3/meson.build > >> +++ b/drivers/net/hns3/meson.build > >> @@ -32,7 +32,7 @@ deps += ['hash'] > >> if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > >> sources += files('hns3_rxtx_vec.c') > >> if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > >> - cflags = ['-DCC_SVE_SUPPORT'] > >> + cflags += ['-DCC_SVE_SUPPORT'] > > This comment is unrelated to this patch. We need to be consistent with the > macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to > define an additional flag, I would name it something like > 'RTE_ARM_FEATURE_SVE'. > > > I think the __ARM_FEATURE_SVE is ok. if use the gcc version included SVE > flag, it will be identified as __ARM_FEATURE_SVE. it is defined in the ARM > SVE document. Yes, we can rely on flags defined by compiler and no extra flag is needed. I can update in next version to remove this section from meson file and replace CC_SVE_SUPPORT in code. > >> sources += files('hns3_rxtx_vec_sve.c') > >> endif > >> endif > >> -- > >> 2.25.1 > > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-11 2:39 ` Ruifeng Wang @ 2021-01-11 13:38 ` Honnappa Nagarahalli 0 siblings, 0 replies; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-11 13:38 UTC (permalink / raw) To: Ruifeng Wang, oulijun, Min Hu (Connor), Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, nd, stable, Honnappa Nagarahalli, nd <snip> > > > > > >> > > >> Building with SVE extension enabled stopped with error: > > >> > > >> error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > > >> 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > > >> > > >> This is caused by unintentional cflags reset. > > >> Fixed the issue by appending required flag to cflags instead of overriding it. > > >> > > >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > > >> Cc: xavier.huwei@huawei.com > > >> Cc: stable@dpdk.org > > >> > > >> Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > >> --- > > >> drivers/net/hns3/meson.build | 2 +- > > >> 1 file changed, 1 insertion(+), 1 deletion(-) > > >> > > >> diff --git a/drivers/net/hns3/meson.build > > >> b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644 > > >> --- a/drivers/net/hns3/meson.build > > >> +++ b/drivers/net/hns3/meson.build > > >> @@ -32,7 +32,7 @@ deps += ['hash'] > > >> if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > > >> sources += files('hns3_rxtx_vec.c') > > >> if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > > >> -cflags = ['-DCC_SVE_SUPPORT'] > > >> +cflags += ['-DCC_SVE_SUPPORT'] > > > This comment is unrelated to this patch. We need to be consistent > > > with the > > macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to > > define an additional flag, I would name it something like > > 'RTE_ARM_FEATURE_SVE'. > > > > > I think the __ARM_FEATURE_SVE is ok. if use the gcc version included > > SVE flag, it will be identified as __ARM_FEATURE_SVE. it is defined in > > the ARM SVE document. > > Yes, we can rely on flags defined by compiler and no extra flag is needed. > I can update in next version to remove this section from meson file and replace > CC_SVE_SUPPORT in code. Sounds good to me. > > >> sources += files('hns3_rxtx_vec_sve.c') > > >> endif > > >> endif > > >> -- > > >> 2.25.1 > > > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang 2021-01-09 0:06 ` Honnappa Nagarahalli @ 2021-01-09 2:15 ` oulijun 2021-01-11 2:27 ` Ruifeng Wang 1 sibling, 1 reply; 43+ messages in thread From: oulijun @ 2021-01-09 2:15 UTC (permalink / raw) To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, stable 在 2021/1/8 16:25, Ruifeng Wang 写道: > Building with SVE extension enabled stopped with error: > > error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > > This is caused by unintentional cflags reset. > Fixed the issue by appending required flag to cflags instead of > overriding it. > > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > Cc: xavier.huwei@huawei.com > Cc: stable@dpdk.org > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > drivers/net/hns3/meson.build | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build > index 45cee34d9..798086357 100644 > --- a/drivers/net/hns3/meson.build > +++ b/drivers/net/hns3/meson.build > @@ -32,7 +32,7 @@ deps += ['hash'] > if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > sources += files('hns3_rxtx_vec.c') > if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > - cflags = ['-DCC_SVE_SUPPORT'] > + cflags += ['-DCC_SVE_SUPPORT'] Hi I noticed this patch, but I checked that the hns3 driver did not use this function.How did you compile it? Thanks Lijun Ou > sources += files('hns3_rxtx_vec_sve.c') > endif > endif > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled 2021-01-09 2:15 ` oulijun @ 2021-01-11 2:27 ` Ruifeng Wang 0 siblings, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-11 2:27 UTC (permalink / raw) To: oulijun, Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Huisong Li, Chengchang Tang, Chengwen Feng Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, Honnappa Nagarahalli, nd, stable, nd > -----Original Message----- > From: oulijun <oulijun@huawei.com> > Sent: Saturday, January 9, 2021 10:16 AM > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Wei Hu (Xavier) > <xavier.huwei@huawei.com>; Min Hu (Connor) <humin29@huawei.com>; > Yisen Zhuang <yisen.zhuang@huawei.com>; Huisong Li > <lihuisong@huawei.com>; Chengchang Tang > <tangchengchang@huawei.com>; Chengwen Feng > <fengchengwen@huawei.com> > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; jerinj@marvell.com; > hemant.agrawal@nxp.com; Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>; stable@dpdk.org > Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled > > > > 在 2021/1/8 16:25, Ruifeng Wang 写道: > > Building with SVE extension enabled stopped with error: > > > > error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > > 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > > > > This is caused by unintentional cflags reset. > > Fixed the issue by appending required flag to cflags instead of > > overriding it. > > > > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > > Cc: xavier.huwei@huawei.com > > Cc: stable@dpdk.org > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > --- > > drivers/net/hns3/meson.build | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/net/hns3/meson.build > > b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644 > > --- a/drivers/net/hns3/meson.build > > +++ b/drivers/net/hns3/meson.build > > @@ -32,7 +32,7 @@ deps += ['hash'] > > if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > > sources += files('hns3_rxtx_vec.c') > > if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > > - cflags = ['-DCC_SVE_SUPPORT'] > > + cflags += ['-DCC_SVE_SUPPORT'] > Hi > I noticed this patch, but I checked that the hns3 driver did not use this > function.How did you compile it? Hi, The hns3 driver has sve rx/tx implementation in hns3_rxtx_vec_sve.c. This path will be enabled when compiling with sve feature enabled. I compiled it by using gcc-10.2 with flag '-march=armv8.3-a+sve'. You can try compile for n2 with the cross file added in this series (5/5). Thanks, Ruifeng > > Thanks > Lijun Ou > > sources += files('hns3_rxtx_vec_sve.c') > > endif > > endif > > ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 3/5] net/octeontx: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang 4 siblings, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) To: Harman Kalra, Jerin Jacob, Santosh Shukla Cc: dev, vladimir.medvedkin, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with gcc 10.2 with SVE extension enabled got error: {standard input}: Assembler messages: {standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1' {standard input}:95: Error: selected processor does not support `ptrue p1.d,all' {standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5' {standard input}:137: Error: selected processor does not support `decb x1' This is because inline assembly code explicitly resets cpu model to not have SVE support. Thus SVE instructions generated by compiler auto vectorization got rejected by assembler. Fixed the issue by replacing inline assembly with equivalent atomic built-ins. Compiler will generate LSE instructions for cpu that has the extension. Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations") Cc: jerinj@marvell.com Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- drivers/net/octeontx/base/octeontx_io.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h index 04b9ce191..0bf9b100d 100644 --- a/drivers/net/octeontx/base/octeontx_io.h +++ b/drivers/net/octeontx/base/octeontx_io.h @@ -58,14 +58,8 @@ do { \ static inline uint64_t octeontx_reg_ldadd_u64(void *addr, int64_t off) { - uint64_t old_val; - - __asm__ volatile( - " .cpu generic+lse\n" - " ldadd %1, %0, [%2]\n" - : "=r" (old_val) : "r" (off), "r" (addr) : "memory"); - - return old_val; + return (uint64_t)__atomic_fetch_add((int64_t *)addr, off, + __ATOMIC_RELAXED); } /** @@ -97,10 +91,8 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], } /* LDEOR initiates atomic transfer to I/O device */ - __asm__ volatile( - " .cpu generic+lse\n" - " ldeor xzr, %0, [%1]\n" - : "=r" (result) : "r" (ioreg_va) : "memory"); + result = __atomic_fetch_xor((uint64_t *)ioreg_va, 0, + __ATOMIC_RELAXED); } while (!result); } -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang ` (2 preceding siblings ...) 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-08 10:29 ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang 4 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with gcc 10.2 with SVE extension enabled got error: {standard input}: Assembler messages: {standard input}:4002: Error: selected processor does not support `mov z3.b,#0' {standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7' {standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]' {standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7' This is because inline assembly code explicitly resets cpu model to not have SVE support. Thus SVE instructions generated by compiler auto vectorization got rejected by assembler. Fixed the issue by replacing inline assembly with equivalent atomic built-ins. Compiler will generate LSE instructions for cpu that has the extension. Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs") Cc: jerinj@marvell.com Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- drivers/common/octeontx2/otx2_io_arm64.h | 37 +++--------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h index b5c85d9a6..8843a79b5 100644 --- a/drivers/common/octeontx2/otx2_io_arm64.h +++ b/drivers/common/octeontx2/otx2_io_arm64.h @@ -24,55 +24,26 @@ static __rte_always_inline uint64_t otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) { - uint64_t result; - /* Atomic add with no ordering */ - asm volatile ( - ".cpu generic+lse\n" - "ldadd %x[i], %x[r], [%[b]]" - : [r] "=r" (result), "+m" (*ptr) - : [i] "r" (incr), [b] "r" (ptr) - : "memory"); - return result; + return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_RELAXED); } static __rte_always_inline uint64_t otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) { - uint64_t result; - - /* Atomic add with ordering */ - asm volatile ( - ".cpu generic+lse\n" - "ldadda %x[i], %x[r], [%[b]]" - : [r] "=r" (result), "+m" (*ptr) - : [i] "r" (incr), [b] "r" (ptr) - : "memory"); - return result; + return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_ACQUIRE); } static __rte_always_inline uint64_t otx2_lmt_submit(rte_iova_t io_address) { - uint64_t result; - - asm volatile ( - ".cpu generic+lse\n" - "ldeor xzr,%x[rf],[%[rs]]" : - [rf] "=r"(result): [rs] "r"(io_address)); - return result; + return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELAXED); } static __rte_always_inline uint64_t otx2_lmt_submit_release(rte_iova_t io_address) { - uint64_t result; - - asm volatile ( - ".cpu generic+lse\n" - "ldeorl xzr,%x[rf],[%[rs]]" : - [rf] "=r"(result) : [rs] "r"(io_address)); - return result; + return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELEASE); } static __rte_always_inline void -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang @ 2021-01-08 10:29 ` Pavan Nikhilesh Bhagavatula 2021-01-11 9:51 ` Ruifeng Wang 0 siblings, 1 reply; 43+ messages in thread From: Pavan Nikhilesh Bhagavatula @ 2021-01-08 10:29 UTC (permalink / raw) To: Ruifeng Wang, Jerin Jacob Kollanukkaran, Nithin Kumar Dabilpuram Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli, nd, stable Hi Ruifeng, >Building with gcc 10.2 with SVE extension enabled got error: > >{standard input}: Assembler messages: >{standard input}:4002: Error: selected processor does not support `mov >z3.b,#0' >{standard input}:4003: Error: selected processor does not support >`whilelo p1.b,xzr,x7' >{standard input}:4005: Error: selected processor does not support `ld1b >z0.b,p1/z,[x8]' >{standard input}:4006: Error: selected processor does not support >`whilelo p4.s,wzr,w7' > >This is because inline assembly code explicitly resets cpu model to >not have SVE support. Thus SVE instructions generated by compiler >auto vectorization got rejected by assembler. > >Fixed the issue by replacing inline assembly with equivalent atomic >built-ins. Compiler will generate LSE instructions for cpu that has >the extension. > >Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs") >Cc: jerinj@marvell.com >Cc: stable@dpdk.org > >Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> >--- > drivers/common/octeontx2/otx2_io_arm64.h | 37 +++-------------------- >- > 1 file changed, 4 insertions(+), 33 deletions(-) > >diff --git a/drivers/common/octeontx2/otx2_io_arm64.h >b/drivers/common/octeontx2/otx2_io_arm64.h >index b5c85d9a6..8843a79b5 100644 >--- a/drivers/common/octeontx2/otx2_io_arm64.h >+++ b/drivers/common/octeontx2/otx2_io_arm64.h >@@ -24,55 +24,26 @@ > static __rte_always_inline uint64_t > otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) > { >- uint64_t result; >- > /* Atomic add with no ordering */ >- asm volatile ( >- ".cpu generic+lse\n" >- "ldadd %x[i], %x[r], [%[b]]" >- : [r] "=r" (result), "+m" (*ptr) >- : [i] "r" (incr), [b] "r" (ptr) >- : "memory"); >- return result; >+ return (uint64_t)__atomic_fetch_add(ptr, incr, >__ATOMIC_RELAXED); > } > Here LDADD acts as a way to interface to co-processors i.e. LDADD instruction opcode + specific io address are recognized by HW interceptor and dispatched to the specific coprocessor. Leaving it to the compiler to use the correct instruction is a bad idea. This breaks the arm64_armv8_linux_gcc build as it doesn't have the +lse enabled. __atomic_fetch_add will generate a different instruction with SVE enabled. Instead can we add +sve to the first line to prevent outer loop from optimizing out the trap? I tested with 10.2 and n2 config below change works fine. -" .cpu generic+lse\n" +" .cpu generic+lse+sve\n" Regards, Pavan. > static __rte_always_inline uint64_t > otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) > { >- uint64_t result; >- >- /* Atomic add with ordering */ >- asm volatile ( >- ".cpu generic+lse\n" >- "ldadda %x[i], %x[r], [%[b]]" >- : [r] "=r" (result), "+m" (*ptr) >- : [i] "r" (incr), [b] "r" (ptr) >- : "memory"); >- return result; >+ return (uint64_t)__atomic_fetch_add(ptr, incr, >__ATOMIC_ACQUIRE); > } > > static __rte_always_inline uint64_t > otx2_lmt_submit(rte_iova_t io_address) > { >- uint64_t result; >- >- asm volatile ( >- ".cpu generic+lse\n" >- "ldeor xzr,%x[rf],[%[rs]]" : >- [rf] "=r"(result): [rs] "r"(io_address)); >- return result; >+ return __atomic_fetch_xor((uint64_t *)io_address, 0, >__ATOMIC_RELAXED); > } > > static __rte_always_inline uint64_t > otx2_lmt_submit_release(rte_iova_t io_address) > { >- uint64_t result; >- >- asm volatile ( >- ".cpu generic+lse\n" >- "ldeorl xzr,%x[rf],[%[rs]]" : >- [rf] "=r"(result) : [rs] "r"(io_address)); >- return result; >+ return __atomic_fetch_xor((uint64_t *)io_address, 0, >__ATOMIC_RELEASE); > } > > static __rte_always_inline void >-- >2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled 2021-01-08 10:29 ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula @ 2021-01-11 9:51 ` Ruifeng Wang 0 siblings, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-11 9:51 UTC (permalink / raw) To: Pavan Nikhilesh Bhagavatula, jerinj, Nithin Kumar Dabilpuram Cc: dev, vladimir.medvedkin, hemant.agrawal, Honnappa Nagarahalli, nd, stable, nd > -----Original Message----- > From: Pavan Nikhilesh Bhagavatula <pbhagavatula@marvell.com> > Sent: Friday, January 8, 2021 6:29 PM > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Nithin > Kumar Dabilpuram <ndabilpuram@marvell.com> > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; > hemant.agrawal@nxp.com; Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>; stable@dpdk.org > Subject: RE: [EXT] [PATCH v2 4/5] common/octeontx2: fix build with sve > enabled > > Hi Ruifeng, > > >Building with gcc 10.2 with SVE extension enabled got error: > > > >{standard input}: Assembler messages: > >{standard input}:4002: Error: selected processor does not support `mov > >z3.b,#0' > >{standard input}:4003: Error: selected processor does not support > >`whilelo p1.b,xzr,x7' > >{standard input}:4005: Error: selected processor does not support `ld1b > >z0.b,p1/z,[x8]' > >{standard input}:4006: Error: selected processor does not support > >`whilelo p4.s,wzr,w7' > > > >This is because inline assembly code explicitly resets cpu model to not > >have SVE support. Thus SVE instructions generated by compiler auto > >vectorization got rejected by assembler. > > > >Fixed the issue by replacing inline assembly with equivalent atomic > >built-ins. Compiler will generate LSE instructions for cpu that has the > >extension. > > > >Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs") > >Cc: jerinj@marvell.com > >Cc: stable@dpdk.org > > > >Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > >--- > > drivers/common/octeontx2/otx2_io_arm64.h | 37 +++-------------------- > >- > > 1 file changed, 4 insertions(+), 33 deletions(-) > > > >diff --git a/drivers/common/octeontx2/otx2_io_arm64.h > >b/drivers/common/octeontx2/otx2_io_arm64.h > >index b5c85d9a6..8843a79b5 100644 > >--- a/drivers/common/octeontx2/otx2_io_arm64.h > >+++ b/drivers/common/octeontx2/otx2_io_arm64.h > >@@ -24,55 +24,26 @@ > > static __rte_always_inline uint64_t > > otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) { > >- uint64_t result; > >- > > /* Atomic add with no ordering */ > >- asm volatile ( > >- ".cpu generic+lse\n" > >- "ldadd %x[i], %x[r], [%[b]]" > >- : [r] "=r" (result), "+m" (*ptr) > >- : [i] "r" (incr), [b] "r" (ptr) > >- : "memory"); > >- return result; > >+ return (uint64_t)__atomic_fetch_add(ptr, incr, > >__ATOMIC_RELAXED); > > } > > > > Here LDADD acts as a way to interface to co-processors i.e. > LDADD instruction opcode + specific io address are recognized by HW > interceptor and dispatched to the specific coprocessor. OK. Now I understand the background. > > Leaving it to the compiler to use the correct instruction is a bad idea. > This breaks the arm64_armv8_linux_gcc build as it doesn't have the > +lse enabled. > __atomic_fetch_add will generate a different instruction with SVE enabled. > > Instead can we add +sve to the first line to prevent outer loop from > optimizing out the trap? Since the inline assembly needs to be preserved, we have to tune the enabled extensions. I will change in next version. Thanks, Ruifeng > > I tested with 10.2 and n2 config below change works fine. > -" .cpu generic+lse\n" > +" .cpu generic+lse+sve\n" > > Regards, > Pavan. > > > static __rte_always_inline uint64_t > > otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) { > >- uint64_t result; > >- > >- /* Atomic add with ordering */ > >- asm volatile ( > >- ".cpu generic+lse\n" > >- "ldadda %x[i], %x[r], [%[b]]" > >- : [r] "=r" (result), "+m" (*ptr) > >- : [i] "r" (incr), [b] "r" (ptr) > >- : "memory"); > >- return result; > >+ return (uint64_t)__atomic_fetch_add(ptr, incr, > >__ATOMIC_ACQUIRE); > > } > > > > static __rte_always_inline uint64_t > > otx2_lmt_submit(rte_iova_t io_address) { > >- uint64_t result; > >- > >- asm volatile ( > >- ".cpu generic+lse\n" > >- "ldeor xzr,%x[rf],[%[rs]]" : > >- [rf] "=r"(result): [rs] "r"(io_address)); > >- return result; > >+ return __atomic_fetch_xor((uint64_t *)io_address, 0, > >__ATOMIC_RELAXED); > > } > > > > static __rte_always_inline uint64_t > > otx2_lmt_submit_release(rte_iova_t io_address) { > >- uint64_t result; > >- > >- asm volatile ( > >- ".cpu generic+lse\n" > >- "ldeorl xzr,%x[rf],[%[rs]]" : > >- [rf] "=r"(result) : [rs] "r"(io_address)); > >- return result; > >+ return __atomic_fetch_xor((uint64_t *)io_address, 0, > >__ATOMIC_RELEASE); > > } > > > > static __rte_always_inline void > >-- > >2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang ` (3 preceding siblings ...) 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang @ 2021-01-08 8:25 ` Ruifeng Wang 2021-01-08 23:58 ` Honnappa Nagarahalli 4 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-08 8:25 UTC (permalink / raw) To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli, nd Add Arm Neoverse N2 cpu support. Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ config/arm/meson.build | 11 ++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 config/arm/arm64_n2_linux_gcc diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc new file mode 100644 index 000000000..78f6f3e2b --- /dev/null +++ b/config/arm/arm64_n2_linux_gcc @@ -0,0 +1,17 @@ +[binaries] +c = 'aarch64-linux-gnu-gcc' +cpp = 'aarch64-linux-gnu-cpp' +ar = 'aarch64-linux-gnu-gcc-ar' +strip = 'aarch64-linux-gnu-strip' +pkgconfig = 'aarch64-linux-gnu-pkg-config' +pcap-config = '' + +[host_machine] +system = 'linux' +cpu_family = 'aarch64' +cpu = 'armv8-a' +endian = 'little' + +[properties] +implementor_id = '0x41' +implementor_pn = '0xd49' diff --git a/config/arm/meson.build b/config/arm/meson.build index 42b4e43c7..58e0ae643 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -89,6 +89,14 @@ flags_n1generic_extra = [ ['RTE_MAX_NUMA_NODES', 1], ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], ['RTE_LIBRTE_VHOST_NUMA', false]] +flags_n2generic_extra = [ + ['RTE_MACHINE', '"neoverse-n2"'], + ['RTE_MAX_LCORE', 64], + ['RTE_CACHE_LINE_SIZE', 64], + ['RTE_ARM_FEATURE_ATOMICS', true], + ['RTE_USE_C11_MEM_MODEL', true], + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], + ['RTE_LIBRTE_VHOST_NUMA', false]] machine_args_generic = [ ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 +108,8 @@ machine_args_generic = [ ['0xd09', ['-mcpu=cortex-a73']], ['0xd0a', ['-mcpu=cortex-a75']], ['0xd0b', ['-mcpu=cortex-a76']], - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]] + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra], + ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]] machine_args_cavium = [ ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang @ 2021-01-08 23:58 ` Honnappa Nagarahalli 2021-01-11 3:01 ` Ruifeng Wang 0 siblings, 1 reply; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-08 23:58 UTC (permalink / raw) To: Ruifeng Wang, jerinj, Ruifeng Wang, Jan Viktorin, Bruce Richardson Cc: dev, vladimir.medvedkin, hemant.agrawal, nd, Honnappa Nagarahalli, nd + Juraj Please note that this clashes with Juraj's patch for meson rework. <snip> > > Add Arm Neoverse N2 cpu support. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > config/arm/meson.build | 11 ++++++++++- > 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 > config/arm/arm64_n2_linux_gcc > > diff --git a/config/arm/arm64_n2_linux_gcc > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > 000000000..78f6f3e2b > --- /dev/null > +++ b/config/arm/arm64_n2_linux_gcc > @@ -0,0 +1,17 @@ > +[binaries] > +c = 'aarch64-linux-gnu-gcc' > +cpp = 'aarch64-linux-gnu-cpp' > +ar = 'aarch64-linux-gnu-gcc-ar' > +strip = 'aarch64-linux-gnu-strip' > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > +pcap-config = '' > + > +[host_machine] > +system = 'linux' > +cpu_family = 'aarch64' > +cpu = 'armv8-a' > +endian = 'little' > + > +[properties] > +implementor_id = '0x41' > +implementor_pn = '0xd49' > diff --git a/config/arm/meson.build b/config/arm/meson.build index > 42b4e43c7..58e0ae643 100644 > --- a/config/arm/meson.build > +++ b/config/arm/meson.build > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > ['RTE_MAX_NUMA_NODES', 1], > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > ['RTE_LIBRTE_VHOST_NUMA', false]] > +flags_n2generic_extra = [ > + ['RTE_MACHINE', '"neoverse-n2"'], > + ['RTE_MAX_LCORE', 64], > + ['RTE_CACHE_LINE_SIZE', 64], > + ['RTE_ARM_FEATURE_ATOMICS', true], > + ['RTE_USE_C11_MEM_MODEL', true], > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > + ['RTE_LIBRTE_VHOST_NUMA', false]] Do we need a flag RTE_ARM_FEATURE_SVE? > > machine_args_generic = [ > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 > +108,8 @@ machine_args_generic = [ > ['0xd09', ['-mcpu=cortex-a73']], > ['0xd0a', ['-mcpu=cortex-a75']], > ['0xd0b', ['-mcpu=cortex-a76']], > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > flags_n1generic_extra]] > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > flags_n1generic_extra], > + ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]] Should this be 'sve2'? There should be a flag to indicate SVE2. > > machine_args_cavium = [ > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > -- > 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-08 23:58 ` Honnappa Nagarahalli @ 2021-01-11 3:01 ` Ruifeng Wang 2021-01-11 3:09 ` Jerin Jacob 0 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-11 3:01 UTC (permalink / raw) To: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson Cc: dev, vladimir.medvedkin, hemant.agrawal, nd, nd, nd > -----Original Message----- > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> > Sent: Saturday, January 9, 2021 7:58 AM > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Ruifeng > Wang <Ruifeng.Wang@arm.com>; Jan Viktorin <viktorin@rehivetech.com>; > Bruce Richardson <bruce.richardson@intel.com> > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; > hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com> > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2 > > + Juraj > > Please note that this clashes with Juraj's patch for meson rework. Yes. I didn't base it on the build options rework series. I will rebase when that series got merged. > > <snip> > > > > > Add Arm Neoverse N2 cpu support. > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > --- > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > > config/arm/meson.build | 11 ++++++++++- > > 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 > > config/arm/arm64_n2_linux_gcc > > > > diff --git a/config/arm/arm64_n2_linux_gcc > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > > 000000000..78f6f3e2b > > --- /dev/null > > +++ b/config/arm/arm64_n2_linux_gcc > > @@ -0,0 +1,17 @@ > > +[binaries] > > +c = 'aarch64-linux-gnu-gcc' > > +cpp = 'aarch64-linux-gnu-cpp' > > +ar = 'aarch64-linux-gnu-gcc-ar' > > +strip = 'aarch64-linux-gnu-strip' > > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > > +pcap-config = '' > > + > > +[host_machine] > > +system = 'linux' > > +cpu_family = 'aarch64' > > +cpu = 'armv8-a' > > +endian = 'little' > > + > > +[properties] > > +implementor_id = '0x41' > > +implementor_pn = '0xd49' > > diff --git a/config/arm/meson.build b/config/arm/meson.build index > > 42b4e43c7..58e0ae643 100644 > > --- a/config/arm/meson.build > > +++ b/config/arm/meson.build > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > > ['RTE_MAX_NUMA_NODES', 1], > > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > ['RTE_LIBRTE_VHOST_NUMA', false]] > > +flags_n2generic_extra = [ > > + ['RTE_MACHINE', '"neoverse-n2"'], > > + ['RTE_MAX_LCORE', 64], > > + ['RTE_CACHE_LINE_SIZE', 64], > > + ['RTE_ARM_FEATURE_ATOMICS', true], > > + ['RTE_USE_C11_MEM_MODEL', true], > > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > + ['RTE_LIBRTE_VHOST_NUMA', false]] > Do we need a flag RTE_ARM_FEATURE_SVE? I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from compiler. One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when we are writing inline assembly with sve instructions and using compiler that has no sve support. I'm not sure we will have sve inline assembly as C intrinsics are available. > > > > > machine_args_generic = [ > > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 > > +108,8 @@ machine_args_generic = [ > > ['0xd09', ['-mcpu=cortex-a73']], > > ['0xd0a', ['-mcpu=cortex-a75']], > > ['0xd0b', ['-mcpu=cortex-a76']], > > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > flags_n1generic_extra]] > > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > flags_n1generic_extra], > > + ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]] > Should this be 'sve2'? There should be a flag to indicate SVE2. Yes. N2 supports sve2 and sve2 is superset of sve. I will do the change in next version. > > > > > machine_args_cavium = [ > > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > > -- > > 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-11 3:01 ` Ruifeng Wang @ 2021-01-11 3:09 ` Jerin Jacob 2021-01-11 8:32 ` Ruifeng Wang 0 siblings, 1 reply; 43+ messages in thread From: Jerin Jacob @ 2021-01-11 3:09 UTC (permalink / raw) To: Ruifeng Wang Cc: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson, dev, vladimir.medvedkin, hemant.agrawal, nd On Mon, Jan 11, 2021 at 8:31 AM Ruifeng Wang <Ruifeng.Wang@arm.com> wrote: > > > > -----Original Message----- > > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> > > Sent: Saturday, January 9, 2021 7:58 AM > > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; Ruifeng > > Wang <Ruifeng.Wang@arm.com>; Jan Viktorin <viktorin@rehivetech.com>; > > Bruce Richardson <bruce.richardson@intel.com> > > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; > > hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli > > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com> > > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2 > > > > + Juraj > > > > Please note that this clashes with Juraj's patch for meson rework. > > Yes. I didn't base it on the build options rework series. > I will rebase when that series got merged. > > > > <snip> > > > > > > > > Add Arm Neoverse N2 cpu support. > > > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > > --- > > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > > > config/arm/meson.build | 11 ++++++++++- > > > 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 > > > config/arm/arm64_n2_linux_gcc > > > > > > diff --git a/config/arm/arm64_n2_linux_gcc > > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > > > 000000000..78f6f3e2b > > > --- /dev/null > > > +++ b/config/arm/arm64_n2_linux_gcc > > > @@ -0,0 +1,17 @@ > > > +[binaries] > > > +c = 'aarch64-linux-gnu-gcc' > > > +cpp = 'aarch64-linux-gnu-cpp' > > > +ar = 'aarch64-linux-gnu-gcc-ar' > > > +strip = 'aarch64-linux-gnu-strip' > > > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > > > +pcap-config = '' > > > + > > > +[host_machine] > > > +system = 'linux' > > > +cpu_family = 'aarch64' > > > +cpu = 'armv8-a' > > > +endian = 'little' > > > + > > > +[properties] > > > +implementor_id = '0x41' > > > +implementor_pn = '0xd49' > > > diff --git a/config/arm/meson.build b/config/arm/meson.build index > > > 42b4e43c7..58e0ae643 100644 > > > --- a/config/arm/meson.build > > > +++ b/config/arm/meson.build > > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > > > ['RTE_MAX_NUMA_NODES', 1], > > > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > ['RTE_LIBRTE_VHOST_NUMA', false]] > > > +flags_n2generic_extra = [ > > > + ['RTE_MACHINE', '"neoverse-n2"'], > > > + ['RTE_MAX_LCORE', 64], > > > + ['RTE_CACHE_LINE_SIZE', 64], > > > + ['RTE_ARM_FEATURE_ATOMICS', true], > > > + ['RTE_USE_C11_MEM_MODEL', true], > > > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > + ['RTE_LIBRTE_VHOST_NUMA', false]] > > Do we need a flag RTE_ARM_FEATURE_SVE? > > I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from compiler. > One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when we are > writing inline assembly with sve instructions and using compiler that has no sve support. > I'm not sure we will have sve inline assembly as C intrinsics are available. It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any compiler difference in future(GCC vs clang or another tool chain etc). > > > > > > > > machine_args_generic = [ > > > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 > > > +108,8 @@ machine_args_generic = [ > > > ['0xd09', ['-mcpu=cortex-a73']], > > > ['0xd0a', ['-mcpu=cortex-a75']], > > > ['0xd0b', ['-mcpu=cortex-a76']], > > > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > flags_n1generic_extra]] > > > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > flags_n1generic_extra], > > > + ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]] > > Should this be 'sve2'? There should be a flag to indicate SVE2. > > Yes. N2 supports sve2 and sve2 is superset of sve. > I will do the change in next version. > > > > > > > > machine_args_cavium = [ > > > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > > > -- > > > 2.25.1 > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-11 3:09 ` Jerin Jacob @ 2021-01-11 8:32 ` Ruifeng Wang 2021-01-11 13:58 ` Honnappa Nagarahalli 0 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-11 8:32 UTC (permalink / raw) To: Jerin Jacob Cc: Honnappa Nagarahalli, jerinj, Jan Viktorin, Bruce Richardson, dev, vladimir.medvedkin, hemant.agrawal, nd, nd > -----Original Message----- > From: Jerin Jacob <jerinjacobk@gmail.com> > Sent: Monday, January 11, 2021 11:09 AM > To: Ruifeng Wang <Ruifeng.Wang@arm.com> > Cc: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; > jerinj@marvell.com; Jan Viktorin <viktorin@rehivetech.com>; Bruce > Richardson <bruce.richardson@intel.com>; dev@dpdk.org; > vladimir.medvedkin@intel.com; hemant.agrawal@nxp.com; nd > <nd@arm.com> > Subject: Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 > > On Mon, Jan 11, 2021 at 8:31 AM Ruifeng Wang <Ruifeng.Wang@arm.com> > wrote: > > > > > > > -----Original Message----- > > > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> > > > Sent: Saturday, January 9, 2021 7:58 AM > > > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; jerinj@marvell.com; > Ruifeng > > > Wang <Ruifeng.Wang@arm.com>; Jan Viktorin > <viktorin@rehivetech.com>; > > > Bruce Richardson <bruce.richardson@intel.com> > > > Cc: dev@dpdk.org; vladimir.medvedkin@intel.com; > > > hemant.agrawal@nxp.com; nd <nd@arm.com>; Honnappa Nagarahalli > > > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com> > > > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2 > > > > > > + Juraj > > > > > > Please note that this clashes with Juraj's patch for meson rework. > > > > Yes. I didn't base it on the build options rework series. > > I will rebase when that series got merged. > > > > > > <snip> > > > > > > > > > > > Add Arm Neoverse N2 cpu support. > > > > > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > > > --- > > > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > > > > config/arm/meson.build | 11 ++++++++++- > > > > 2 files changed, 27 insertions(+), 1 deletion(-) create mode > > > > 100644 config/arm/arm64_n2_linux_gcc > > > > > > > > diff --git a/config/arm/arm64_n2_linux_gcc > > > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > > > > 000000000..78f6f3e2b > > > > --- /dev/null > > > > +++ b/config/arm/arm64_n2_linux_gcc > > > > @@ -0,0 +1,17 @@ > > > > +[binaries] > > > > +c = 'aarch64-linux-gnu-gcc' > > > > +cpp = 'aarch64-linux-gnu-cpp' > > > > +ar = 'aarch64-linux-gnu-gcc-ar' > > > > +strip = 'aarch64-linux-gnu-strip' > > > > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > > > > +pcap-config = '' > > > > + > > > > +[host_machine] > > > > +system = 'linux' > > > > +cpu_family = 'aarch64' > > > > +cpu = 'armv8-a' > > > > +endian = 'little' > > > > + > > > > +[properties] > > > > +implementor_id = '0x41' > > > > +implementor_pn = '0xd49' > > > > diff --git a/config/arm/meson.build b/config/arm/meson.build index > > > > 42b4e43c7..58e0ae643 100644 > > > > --- a/config/arm/meson.build > > > > +++ b/config/arm/meson.build > > > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > > > > ['RTE_MAX_NUMA_NODES', 1], > > > > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > > ['RTE_LIBRTE_VHOST_NUMA', false]] > > > > +flags_n2generic_extra = [ > > > > + ['RTE_MACHINE', '"neoverse-n2"'], > > > > + ['RTE_MAX_LCORE', 64], > > > > + ['RTE_CACHE_LINE_SIZE', 64], > > > > + ['RTE_ARM_FEATURE_ATOMICS', true], > > > > + ['RTE_USE_C11_MEM_MODEL', true], > > > > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > > + ['RTE_LIBRTE_VHOST_NUMA', false]] > > > Do we need a flag RTE_ARM_FEATURE_SVE? > > > > I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE > from compiler. > > One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed > > is, when we are writing inline assembly with sve instructions and using > compiler that has no sve support. > > I'm not sure we will have sve inline assembly as C intrinsics are available. > > It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any > compiler difference in future(GCC vs clang or another tool chain etc). According to Arm C Language Extension (ACLE) for SVE, preprocessor macros like __ARM_FEATURE_SVE are defined to indicate available features. GCC and clang have the macros defined. We can have RTE_ARM_FEATURE_SVE for some other tool chain that don't stick to ACLE. I'll add in next version. > > > > > > > > > > > > > machine_args_generic = [ > > > > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ > > > > -100,7 > > > > +108,8 @@ machine_args_generic = [ > > > > ['0xd09', ['-mcpu=cortex-a73']], > > > > ['0xd0a', ['-mcpu=cortex-a75']], > > > > ['0xd0b', ['-mcpu=cortex-a76']], > > > > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > > flags_n1generic_extra]] > > > > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > > flags_n1generic_extra], > > > > + ['0xd49', ['-march=armv8.5-a+crypto+sve'], > > > > + flags_n2generic_extra]] > > > Should this be 'sve2'? There should be a flag to indicate SVE2. > > > > Yes. N2 supports sve2 and sve2 is superset of sve. > > I will do the change in next version. > > > > > > > > > > > machine_args_cavium = [ > > > > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > > > > -- > > > > 2.25.1 > > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 2021-01-11 8:32 ` Ruifeng Wang @ 2021-01-11 13:58 ` Honnappa Nagarahalli 0 siblings, 0 replies; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-11 13:58 UTC (permalink / raw) To: Ruifeng Wang, Jerin Jacob Cc: jerinj, Jan Viktorin, Bruce Richardson, dev, vladimir.medvedkin, hemant.agrawal, nd, Honnappa Nagarahalli, nd <snip> > > > > > > > > + Juraj > > > > > > > > Please note that this clashes with Juraj's patch for meson rework. > > > > > > Yes. I didn't base it on the build options rework series. > > > I will rebase when that series got merged. > > > > > > > > <snip> > > > > > > > > > > > > > > Add Arm Neoverse N2 cpu support. > > > > > > > > > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > > > > > --- > > > > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > > > > > config/arm/meson.build | 11 ++++++++++- > > > > > 2 files changed, 27 insertions(+), 1 deletion(-) create mode > > > > > 100644 config/arm/arm64_n2_linux_gcc > > > > > > > > > > diff --git a/config/arm/arm64_n2_linux_gcc > > > > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > > > > > 000000000..78f6f3e2b > > > > > --- /dev/null > > > > > +++ b/config/arm/arm64_n2_linux_gcc > > > > > @@ -0,0 +1,17 @@ > > > > > +[binaries] > > > > > +c = 'aarch64-linux-gnu-gcc' > > > > > +cpp = 'aarch64-linux-gnu-cpp' > > > > > +ar = 'aarch64-linux-gnu-gcc-ar' > > > > > +strip = 'aarch64-linux-gnu-strip' > > > > > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > > > > > +pcap-config = '' > > > > > + > > > > > +[host_machine] > > > > > +system = 'linux' > > > > > +cpu_family = 'aarch64' > > > > > +cpu = 'armv8-a' > > > > > +endian = 'little' > > > > > + > > > > > +[properties] > > > > > +implementor_id = '0x41' > > > > > +implementor_pn = '0xd49' > > > > > diff --git a/config/arm/meson.build b/config/arm/meson.build > > > > > index > > > > > 42b4e43c7..58e0ae643 100644 > > > > > --- a/config/arm/meson.build > > > > > +++ b/config/arm/meson.build > > > > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > > > > > ['RTE_MAX_NUMA_NODES', 1], > > > > > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > > > ['RTE_LIBRTE_VHOST_NUMA', false]] > > > > > +flags_n2generic_extra = [ > > > > > + ['RTE_MACHINE', '"neoverse-n2"'], > > > > > + ['RTE_MAX_LCORE', 64], > > > > > + ['RTE_CACHE_LINE_SIZE', 64], > > > > > + ['RTE_ARM_FEATURE_ATOMICS', true], > > > > > + ['RTE_USE_C11_MEM_MODEL', true], > > > > > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > > > > > + ['RTE_LIBRTE_VHOST_NUMA', false]] > > > > Do we need a flag RTE_ARM_FEATURE_SVE? > > > > > > I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE > > from compiler. > > > One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed > > > is, when we are writing inline assembly with sve instructions and > > > using > > compiler that has no sve support. > > > I'm not sure we will have sve inline assembly as C intrinsics are available. > > > > It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any > > compiler difference in future(GCC vs clang or another tool chain etc). > > According to Arm C Language Extension (ACLE) for SVE, preprocessor macros > like __ARM_FEATURE_SVE are defined to indicate available features. > GCC and clang have the macros defined. We can have RTE_ARM_FEATURE_SVE > for some other tool chain that don't stick to ACLE. I'll add in next version. The flag __ARM_FEATURE_SVE is a requirement from ACLE. If it is not defined, it is a bug in the compiler. Since, GCC/Clang define this flag, I am thinking we are fine without defining our own. It avoids checking for this additional flag in the code. We can always add it when we come across a toolchain that does not define this flag (or fix the toolchain). BTW, this problem exists for __ARM_FEATURE_ATOMICS, it is not defined by Clang. Hence, we have RTE_ARM_FEATURE_ATOMICS. But, it is getting fixed in Clang. > > > > > > > > > > > > > > > > > > > machine_args_generic = [ > > > > > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ > > > > > -100,7 > > > > > +108,8 @@ machine_args_generic = [ > > > > > ['0xd09', ['-mcpu=cortex-a73']], > > > > > ['0xd0a', ['-mcpu=cortex-a75']], > > > > > ['0xd0b', ['-mcpu=cortex-a76']], > > > > > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > > > flags_n1generic_extra]] > > > > > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > > > > > flags_n1generic_extra], > > > > > + ['0xd49', ['-march=armv8.5-a+crypto+sve'], > > > > > + flags_n2generic_extra]] > > > > Should this be 'sve2'? There should be a flag to indicate SVE2. > > > > > > Yes. N2 supports sve2 and sve2 is superset of sve. > > > I will do the change in next version. > > > > > > > > > > > > > > machine_args_cavium = [ > > > > > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > > > > > -- > > > > > 2.25.1 > > > ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support 2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-05 15:44 ` Medvedkin, Vladimir 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang ` (5 more replies) 2 siblings, 6 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang Added lpm4 lookupx4 implementation by using Arm SVE extension. The SVE is Scalable Vector Extension which is exposed to the user with a vector length agnostic interface. Refer to [1] for more information about SVE. Configuration was added for Neoverse N2 CPU which has SVE support. Some bugs were fixed so compiling with sve enabled can pass. [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve --- v2: Fixed tbl8 group index calculation. (Vladimir) Added N2 config. Fixed compiling when sve was enabled. Ruifeng Wang (5): lpm: add sve support for lookup on Arm platform net/hns3: fix build with sve enabled net/octeontx: fix build with sve enabled common/octeontx2: fix build with sve enabled config: add Arm Neoverse N2 config/arm/arm64_n2_linux_gcc | 17 +++++ config/arm/meson.build | 11 +++- drivers/common/octeontx2/otx2_io_arm64.h | 15 +++-- drivers/net/hns3/hns3_rxtx.c | 4 +- drivers/net/hns3/meson.build | 1 - drivers/net/octeontx/base/octeontx_io.h | 10 ++- lib/librte_eal/arm/include/rte_vect.h | 3 + lib/librte_lpm/meson.build | 2 +- lib/librte_lpm/rte_lpm.h | 4 ++ lib/librte_lpm/rte_lpm_sve.h | 83 ++++++++++++++++++++++++ 10 files changed, 139 insertions(+), 11 deletions(-) create mode 100644 config/arm/arm64_n2_linux_gcc create mode 100644 lib/librte_lpm/rte_lpm_sve.h -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-13 15:58 ` David Marchand 2021-01-27 13:04 ` David Marchand 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang ` (4 subsequent siblings) 5 siblings, 2 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin Cc: dev, pbhagavatula, hemant.agrawal, honnappa.nagarahalli, nd Added new path to do lpm4 lookup by using scalable vector extension. The SVE path will be selected if compiler has flag SVE set. Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- v2: Fixed tbl8 group index calculation. (Vladimir) lib/librte_eal/arm/include/rte_vect.h | 3 + lib/librte_lpm/meson.build | 2 +- lib/librte_lpm/rte_lpm.h | 4 ++ lib/librte_lpm/rte_lpm_sve.h | 83 +++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 lib/librte_lpm/rte_lpm_sve.h diff --git a/lib/librte_eal/arm/include/rte_vect.h b/lib/librte_eal/arm/include/rte_vect.h index a739e6e66..093e9122a 100644 --- a/lib/librte_eal/arm/include/rte_vect.h +++ b/lib/librte_eal/arm/include/rte_vect.h @@ -9,6 +9,9 @@ #include "generic/rte_vect.h" #include "rte_debug.h" #include "arm_neon.h" +#ifdef __ARM_FEATURE_SVE +#include <arm_sve.h> +#endif #ifdef __cplusplus extern "C" { diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build index 6cfc083c5..f93c86640 100644 --- a/lib/librte_lpm/meson.build +++ b/lib/librte_lpm/meson.build @@ -5,6 +5,6 @@ sources = files('rte_lpm.c', 'rte_lpm6.c') headers = files('rte_lpm.h', 'rte_lpm6.h') # since header files have different names, we can install all vector headers # without worrying about which architecture we actually need -headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') +headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h') deps += ['hash'] deps += ['rcu'] diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index 1afe55cdc..28b57683b 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv); #if defined(RTE_ARCH_ARM) +#ifdef __ARM_FEATURE_SVE +#include "rte_lpm_sve.h" +#else #include "rte_lpm_neon.h" +#endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" #else diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index 000000000..2e319373e --- /dev/null +++ b/lib/librte_lpm/rte_lpm_sve.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Arm Limited + */ + +#ifndef _RTE_LPM_SVE_H_ +#define _RTE_LPM_SVE_H_ + +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +__rte_internal +static void +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, + uint32_t *__rte_restrict next_hops, const uint32_t n) +{ + uint32_t i = 0; + svuint32_t v_ip, v_idx, v_tbl24, v_tbl8, v_hop; + svuint32_t v_mask_xv, v_mask_v, v_mask_hop; + svbool_t pg = svwhilelt_b32(i, n); + svbool_t pv; + + do { + v_ip = svld1(pg, &ips[i]); + /* Get indices for tbl24[] */ + v_idx = svlsr_x(pg, v_ip, 8); + /* Extract values from tbl24[] */ + v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24, + v_idx); + + /* Create mask with valid set */ + v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS); + /* Create mask with valid and valid_group set */ + v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK); + /* Create predicate for tbl24 entries: (valid && !valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v); + /* Create mask for next_hop in table entry */ + v_mask_hop = svdup_u32_z(pg, 0x00ffffff); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl24, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + /* Update predicate for tbl24 entries: (valid && valid_group) */ + pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv); + /* Compute tbl8 index */ + v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff)); + v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)), + v_idx); + /* Extract values from tbl8[] */ + v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8, + v_idx); + /* Update predicate for tbl8 entries: (valid) */ + pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v); + /* Extract next_hop and write back */ + v_hop = svand_x(pv, v_tbl8, v_mask_hop); + svst1(pv, &next_hops[i], v_hop); + + i += svlen(v_ip); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); +} + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32_t i, ips[4]; + + vst1q_s32((int32_t *)ips, ip); + for (i = 0; i < 4; i++) + hop[i] = defv; + + __rte_lpm_lookup_vec(lpm, ips, hop, 4); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SVE_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang @ 2021-01-13 15:58 ` David Marchand 2021-01-27 13:04 ` David Marchand 1 sibling, 0 replies; 43+ messages in thread From: David Marchand @ 2021-01-13 15:58 UTC (permalink / raw) To: Ruifeng Wang, Vladimir Medvedkin, Bruce Richardson, Jerin Jacob, Honnappa Nagarahalli Cc: Jan Viktorin, dev, Pavan Nikhilesh, Hemant Agrawal, nd On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > Added new path to do lpm4 lookup by using scalable vector extension. > The SVE path will be selected if compiler has flag SVE set. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Review please? -- David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-13 15:58 ` David Marchand @ 2021-01-27 13:04 ` David Marchand 2021-01-27 21:03 ` Honnappa Nagarahalli 2021-01-28 5:47 ` Ruifeng Wang 1 sibling, 2 replies; 43+ messages in thread From: David Marchand @ 2021-01-27 13:04 UTC (permalink / raw) To: Ruifeng Wang Cc: Jerin Jacob, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev, Pavan Nikhilesh, Hemant Agrawal, Honnappa Nagarahalli, nd On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h > index 1afe55cdc..28b57683b 100644 > --- a/lib/librte_lpm/rte_lpm.h > +++ b/lib/librte_lpm/rte_lpm.h > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > uint32_t defv); > > #if defined(RTE_ARCH_ARM) > +#ifdef __ARM_FEATURE_SVE > +#include "rte_lpm_sve.h" > +#else > #include "rte_lpm_neon.h" > +#endif > #elif defined(RTE_ARCH_PPC_64) > #include "rte_lpm_altivec.h" > #else > diff --git a/lib/librte_lpm/rte_lpm_sve.h b/lib/librte_lpm/rte_lpm_sve.h > new file mode 100644 > index 000000000..2e319373e > --- /dev/null > +++ b/lib/librte_lpm/rte_lpm_sve.h > @@ -0,0 +1,83 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Arm Limited > + */ > + > +#ifndef _RTE_LPM_SVE_H_ > +#define _RTE_LPM_SVE_H_ > + > +#include <rte_vect.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +__rte_internal > +static void I was looking into use of the __rte_internal tag in the tree. This helper is called from a inlined API used by applications, so out of the DPDK build. It looks like the compiler is not complaining when compiling examples (I hacked my env to cross compile with gcc 10 + SVE enabled) but this seems incorrect to me. Is there really a need for this helper? It is only used below afaics. > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > + uint32_t *__rte_restrict next_hops, const uint32_t n) > +{ [snip] > +} > + > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + uint32_t i, ips[4]; > + > + vst1q_s32((int32_t *)ips, ip); > + for (i = 0; i < 4; i++) > + hop[i] = defv; > + > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); > +} -- David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-27 13:04 ` David Marchand @ 2021-01-27 21:03 ` Honnappa Nagarahalli 2021-01-28 8:03 ` David Marchand 2021-01-28 5:47 ` Ruifeng Wang 1 sibling, 1 reply; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-27 21:03 UTC (permalink / raw) To: David Marchand, Ruifeng Wang Cc: jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, nd, Honnappa Nagarahalli, nd <snip> > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> > wrote: > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index > > 1afe55cdc..28b57683b 100644 > > --- a/lib/librte_lpm/rte_lpm.h > > +++ b/lib/librte_lpm/rte_lpm.h > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, > xmm_t ip, uint32_t hop[4], > > uint32_t defv); > > > > #if defined(RTE_ARCH_ARM) > > +#ifdef __ARM_FEATURE_SVE > > +#include "rte_lpm_sve.h" > > +#else > > #include "rte_lpm_neon.h" > > +#endif > > #elif defined(RTE_ARCH_PPC_64) > > #include "rte_lpm_altivec.h" > > #else > > diff --git a/lib/librte_lpm/rte_lpm_sve.h > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index > > 000000000..2e319373e > > --- /dev/null > > +++ b/lib/librte_lpm/rte_lpm_sve.h > > @@ -0,0 +1,83 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2020 Arm Limited > > + */ > > + > > +#ifndef _RTE_LPM_SVE_H_ > > +#define _RTE_LPM_SVE_H_ > > + > > +#include <rte_vect.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +__rte_internal > > +static void > > I was looking into use of the __rte_internal tag in the tree. > > This helper is called from a inlined API used by applications, so out of the > DPDK build. > It looks like the compiler is not complaining when compiling examples (I > hacked my env to cross compile with gcc 10 + SVE enabled) but this seems > incorrect to me. > > Is there really a need for this helper? > It is only used below afaics. I do not think it is required. At the same time the commit log when '__rte_internal' was introduced is confusing. It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI? > > > > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > > + uint32_t *__rte_restrict next_hops, const uint32_t n) > > +{ > > [snip] > > > > +} > > + > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + uint32_t i, ips[4]; > > + > > + vst1q_s32((int32_t *)ips, ip); > > + for (i = 0; i < 4; i++) > > + hop[i] = defv; > > + > > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); } > > > -- > David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-27 21:03 ` Honnappa Nagarahalli @ 2021-01-28 8:03 ` David Marchand 2021-01-28 12:24 ` Honnappa Nagarahalli 0 siblings, 1 reply; 43+ messages in thread From: David Marchand @ 2021-01-28 8:03 UTC (permalink / raw) To: Honnappa Nagarahalli Cc: Ruifeng Wang, jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, nd On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote: > > <snip> > > > > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> > > wrote: > > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index > > > 1afe55cdc..28b57683b 100644 > > > --- a/lib/librte_lpm/rte_lpm.h > > > +++ b/lib/librte_lpm/rte_lpm.h > > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, > > xmm_t ip, uint32_t hop[4], > > > uint32_t defv); > > > > > > #if defined(RTE_ARCH_ARM) > > > +#ifdef __ARM_FEATURE_SVE > > > +#include "rte_lpm_sve.h" > > > +#else > > > #include "rte_lpm_neon.h" > > > +#endif > > > #elif defined(RTE_ARCH_PPC_64) > > > #include "rte_lpm_altivec.h" > > > #else > > > diff --git a/lib/librte_lpm/rte_lpm_sve.h > > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index > > > 000000000..2e319373e > > > --- /dev/null > > > +++ b/lib/librte_lpm/rte_lpm_sve.h > > > @@ -0,0 +1,83 @@ > > > +/* SPDX-License-Identifier: BSD-3-Clause > > > + * Copyright(c) 2020 Arm Limited > > > + */ > > > + > > > +#ifndef _RTE_LPM_SVE_H_ > > > +#define _RTE_LPM_SVE_H_ > > > + > > > +#include <rte_vect.h> > > > + > > > +#ifdef __cplusplus > > > +extern "C" { > > > +#endif > > > + > > > +__rte_internal > > > +static void > > > > I was looking into use of the __rte_internal tag in the tree. > > > > This helper is called from a inlined API used by applications, so out of the > > DPDK build. > > It looks like the compiler is not complaining when compiling examples (I > > hacked my env to cross compile with gcc 10 + SVE enabled) but this seems > > incorrect to me. > > > > Is there really a need for this helper? > > It is only used below afaics. > I do not think it is required. > > At the same time the commit log when '__rte_internal' was introduced is confusing. > It says "Introduce the __rte_internal tag to mark internal ABI function which is used only by the drivers or other libraries". Why would an internal function have an ABI? It happens that drivers/libraries in DPDK offer some interface for other parts of the DPDK to use. But we might want them to keep them hidden to final applications, because this is purely internal and/or we don't want to guarantee compatibility in later versions. For such cases, a function can be marked __rte_internal. This tag has two impacts: - a marked symbol is versionned as INTERNAL when exported (so this does not apply to inlines), - if an application tries to use a marked API, an error is triggered at build time to prevent use of such API, -- David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-28 8:03 ` David Marchand @ 2021-01-28 12:24 ` Honnappa Nagarahalli 0 siblings, 0 replies; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-28 12:24 UTC (permalink / raw) To: David Marchand Cc: Ruifeng Wang, jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, nd, Honnappa Nagarahalli, nd <snip> > > On Wed, Jan 27, 2021 at 10:03 PM Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com> wrote: > > > > <snip> > > > > > > > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> > > > wrote: > > > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h > > > > index 1afe55cdc..28b57683b 100644 > > > > --- a/lib/librte_lpm/rte_lpm.h > > > > +++ b/lib/librte_lpm/rte_lpm.h > > > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, > > > xmm_t ip, uint32_t hop[4], > > > > uint32_t defv); > > > > > > > > #if defined(RTE_ARCH_ARM) > > > > +#ifdef __ARM_FEATURE_SVE > > > > +#include "rte_lpm_sve.h" > > > > +#else > > > > #include "rte_lpm_neon.h" > > > > +#endif > > > > #elif defined(RTE_ARCH_PPC_64) > > > > #include "rte_lpm_altivec.h" > > > > #else > > > > diff --git a/lib/librte_lpm/rte_lpm_sve.h > > > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index > > > > 000000000..2e319373e > > > > --- /dev/null > > > > +++ b/lib/librte_lpm/rte_lpm_sve.h > > > > @@ -0,0 +1,83 @@ > > > > +/* SPDX-License-Identifier: BSD-3-Clause > > > > + * Copyright(c) 2020 Arm Limited > > > > + */ > > > > + > > > > +#ifndef _RTE_LPM_SVE_H_ > > > > +#define _RTE_LPM_SVE_H_ > > > > + > > > > +#include <rte_vect.h> > > > > + > > > > +#ifdef __cplusplus > > > > +extern "C" { > > > > +#endif > > > > + > > > > +__rte_internal > > > > +static void > > > > > > I was looking into use of the __rte_internal tag in the tree. > > > > > > This helper is called from a inlined API used by applications, so > > > out of the DPDK build. > > > It looks like the compiler is not complaining when compiling > > > examples (I hacked my env to cross compile with gcc 10 + SVE > > > enabled) but this seems incorrect to me. > > > > > > Is there really a need for this helper? > > > It is only used below afaics. > > I do not think it is required. > > > > At the same time the commit log when '__rte_internal' was introduced is > confusing. > > It says "Introduce the __rte_internal tag to mark internal ABI function which is > used only by the drivers or other libraries". Why would an internal function have > an ABI? > > It happens that drivers/libraries in DPDK offer some interface for other parts of > the DPDK to use. > But we might want them to keep them hidden to final applications, because this > is purely internal and/or we don't want to guarantee compatibility in later > versions. > For such cases, a function can be marked __rte_internal. > > > This tag has two impacts: > - a marked symbol is versionned as INTERNAL when exported (so this does not > apply to inlines), > - if an application tries to use a marked API, an error is triggered at build time to > prevent use of such API, Thanks David, it makes sense now. The word 'internal ABI' in the commit log caused the confusion. Is this required because all the header files (header files meant for the application and the DPDK internal header files) are in the same directory? From the above definition, we do not need the internal tag for this function as it is very much internal to LPM library. > > > -- > David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform 2021-01-27 13:04 ` David Marchand 2021-01-27 21:03 ` Honnappa Nagarahalli @ 2021-01-28 5:47 ` Ruifeng Wang 1 sibling, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-28 5:47 UTC (permalink / raw) To: David Marchand Cc: jerinj, Jan Viktorin, Bruce Richardson, Vladimir Medvedkin, dev, Pavan Nikhilesh, hemant.agrawal, Honnappa Nagarahalli, nd, nd > -----Original Message----- > From: David Marchand <david.marchand@redhat.com> > Sent: Wednesday, January 27, 2021 9:05 PM > To: Ruifeng Wang <Ruifeng.Wang@arm.com> > Cc: jerinj@marvell.com; Jan Viktorin <viktorin@rehivetech.com>; Bruce > Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin > <vladimir.medvedkin@intel.com>; dev <dev@dpdk.org>; Pavan Nikhilesh > <pbhagavatula@marvell.com>; hemant.agrawal@nxp.com; Honnappa > Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com> > Subject: Re: [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on > Arm platform > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> > wrote: > > diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index > > 1afe55cdc..28b57683b 100644 > > --- a/lib/librte_lpm/rte_lpm.h > > +++ b/lib/librte_lpm/rte_lpm.h > > @@ -402,7 +402,11 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, > xmm_t ip, uint32_t hop[4], > > uint32_t defv); > > > > #if defined(RTE_ARCH_ARM) > > +#ifdef __ARM_FEATURE_SVE > > +#include "rte_lpm_sve.h" > > +#else > > #include "rte_lpm_neon.h" > > +#endif > > #elif defined(RTE_ARCH_PPC_64) > > #include "rte_lpm_altivec.h" > > #else > > diff --git a/lib/librte_lpm/rte_lpm_sve.h > > b/lib/librte_lpm/rte_lpm_sve.h new file mode 100644 index > > 000000000..2e319373e > > --- /dev/null > > +++ b/lib/librte_lpm/rte_lpm_sve.h > > @@ -0,0 +1,83 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2020 Arm Limited > > + */ > > + > > +#ifndef _RTE_LPM_SVE_H_ > > +#define _RTE_LPM_SVE_H_ > > + > > +#include <rte_vect.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +__rte_internal > > +static void > > I was looking into use of the __rte_internal tag in the tree. > > This helper is called from a inlined API used by applications, so out of the > DPDK build. > It looks like the compiler is not complaining when compiling examples (I > hacked my env to cross compile with gcc 10 + SVE enabled) but this seems > incorrect to me. > > Is there really a need for this helper? > It is only used below afaics. My intention was to keep the helper generic. So it can be used not only in rte_lpm_lookupx4 as below, but also in other lookup functions like rte_lpm_lookup_bulk where number of IPs to be looked up is not a fixed value. Will removing __rte_internal tag resolve the issue? > > > > +__rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips, > > + uint32_t *__rte_restrict next_hops, const uint32_t n) > > +{ > > [snip] > > > > +} > > + > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + uint32_t i, ips[4]; > > + > > + vst1q_s32((int32_t *)ips, ip); > > + for (i = 0; i < 4; i++) > > + hop[i] = defv; > > + > > + __rte_lpm_lookup_vec(lpm, ips, hop, 4); } > > > -- > David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-13 2:16 ` Honnappa Nagarahalli 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang ` (3 subsequent siblings) 5 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) To: Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Lijun Ou, Chengwen Feng, Chengchang Tang, Huisong Li Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with SVE extension enabled stopped with error: error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ 18 | #define PG64_256BIT svwhilelt_b64(0, 4) This is caused by unintentional cflags reset. Fixed the issue by not touching cflags, and using flags defined by compiler. Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") Cc: stable@dpdk.org Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- v3: Removed extra flag, use compiler flag instead. drivers/net/hns3/hns3_rxtx.c | 4 ++-- drivers/net/hns3/meson.build | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 88d3baba4..5ac36b314 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -10,7 +10,7 @@ #include <rte_io.h> #include <rte_net.h> #include <rte_malloc.h> -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) #include <rte_cpuflags.h> #endif @@ -2467,7 +2467,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, static bool hns3_check_sve_support(void) { -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE)) return true; #endif diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build index 45cee34d9..5674d986b 100644 --- a/drivers/net/hns3/meson.build +++ b/drivers/net/hns3/meson.build @@ -32,7 +32,6 @@ deps += ['hash'] if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') sources += files('hns3_rxtx_vec.c') if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' - cflags = ['-DCC_SVE_SUPPORT'] sources += files('hns3_rxtx_vec_sve.c') endif endif -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang @ 2021-01-13 2:16 ` Honnappa Nagarahalli 0 siblings, 0 replies; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-13 2:16 UTC (permalink / raw) To: Ruifeng Wang, Wei Hu (Xavier), Min Hu (Connor), Yisen Zhuang, Lijun Ou, Chengwen Feng, Chengchang Tang, Huisong Li Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal, nd, Ruifeng Wang, stable, Honnappa Nagarahalli, nd <snip> > > Building with SVE extension enabled stopped with error: > > error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’ > 18 | #define PG64_256BIT svwhilelt_b64(0, 4) > > This is caused by unintentional cflags reset. > Fixed the issue by not touching cflags, and using flags defined by compiler. > > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx") > Cc: stable@dpdk.org > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > v3: > Removed extra flag, use compiler flag instead. > > drivers/net/hns3/hns3_rxtx.c | 4 ++-- > drivers/net/hns3/meson.build | 1 - > 2 files changed, 2 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c > index 88d3baba4..5ac36b314 100644 > --- a/drivers/net/hns3/hns3_rxtx.c > +++ b/drivers/net/hns3/hns3_rxtx.c > @@ -10,7 +10,7 @@ > #include <rte_io.h> > #include <rte_net.h> > #include <rte_malloc.h> > -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) > +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) > #include <rte_cpuflags.h> > #endif > > @@ -2467,7 +2467,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev > *dev, __rte_unused uint16_t queue_id, static bool > hns3_check_sve_support(void) > { > -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) > +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) > if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE)) > return true; > #endif > diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build > index 45cee34d9..5674d986b 100644 > --- a/drivers/net/hns3/meson.build > +++ b/drivers/net/hns3/meson.build > @@ -32,7 +32,6 @@ deps += ['hash'] > if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') > sources += files('hns3_rxtx_vec.c') > if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' > - cflags = ['-DCC_SVE_SUPPORT'] > sources += files('hns3_rxtx_vec_sve.c') > endif > endif > -- > 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 3/5] net/octeontx: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-12 4:39 ` Jerin Jacob 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang ` (2 subsequent siblings) 5 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) To: Harman Kalra, Santosh Shukla, Jerin Jacob Cc: dev, vladimir.medvedkin, pbhagavatula, jerinj, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with gcc 10.2 with SVE extension enabled got error: {standard input}: Assembler messages: {standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1' {standard input}:95: Error: selected processor does not support `ptrue p1.d,all' {standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5' {standard input}:137: Error: selected processor does not support `decb x1' This is because inline assembly code explicitly resets cpu model to not have SVE support. Thus SVE instructions generated by compiler auto vectorization got rejected by assembler. Added SVE to the cpu model specified by inline assembly for SVE support. Not replacing the inline assembly with C atomics because the driver relies on specific LSE instruction to interface to co-processor [1]. Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations") Cc: jerinj@marvell.com Cc: stable@dpdk.org [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- v3: Keep inline assembly and add sve extension to fix issue. (Pavan) drivers/net/octeontx/base/octeontx_io.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h index 04b9ce191..d0b9cfbc6 100644 --- a/drivers/net/octeontx/base/octeontx_io.h +++ b/drivers/net/octeontx/base/octeontx_io.h @@ -52,6 +52,11 @@ do { \ #endif #if defined(RTE_ARCH_ARM64) +#if defined(__ARM_FEATURE_SVE) +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" +#else +#define __LSE_PREAMBLE " .cpu generic+lse\n" +#endif /** * Perform an atomic fetch-and-add operation. */ @@ -61,7 +66,7 @@ octeontx_reg_ldadd_u64(void *addr, int64_t off) uint64_t old_val; __asm__ volatile( - " .cpu generic+lse\n" + __LSE_PREAMBLE " ldadd %1, %0, [%2]\n" : "=r" (old_val) : "r" (off), "r" (addr) : "memory"); @@ -98,12 +103,13 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], /* LDEOR initiates atomic transfer to I/O device */ __asm__ volatile( - " .cpu generic+lse\n" + __LSE_PREAMBLE " ldeor xzr, %0, [%1]\n" : "=r" (result) : "r" (ioreg_va) : "memory"); } while (!result); } +#undef __LSE_PREAMBLE #else static inline uint64_t -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 3/5] net/octeontx: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang @ 2021-01-12 4:39 ` Jerin Jacob 0 siblings, 0 replies; 43+ messages in thread From: Jerin Jacob @ 2021-01-12 4:39 UTC (permalink / raw) To: Ruifeng Wang Cc: Harman Kalra, Santosh Shukla, Jerin Jacob, dpdk-dev, Vladimir Medvedkin, Pavan Nikhilesh, Jerin Jacob, Hemant Agrawal, Honnappa Nagarahalli, nd, dpdk stable On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > Building with gcc 10.2 with SVE extension enabled got error: > > {standard input}: Assembler messages: > {standard input}:91: Error: selected processor does not support `addvl x4,x8,#-1' > {standard input}:95: Error: selected processor does not support `ptrue p1.d,all' > {standard input}:135: Error: selected processor does not support `whilelo p2.d,xzr,x5' > {standard input}:137: Error: selected processor does not support `decb x1' > > This is because inline assembly code explicitly resets cpu model to > not have SVE support. Thus SVE instructions generated by compiler > auto vectorization got rejected by assembler. > > Added SVE to the cpu model specified by inline assembly for SVE support. > Not replacing the inline assembly with C atomics because the driver relies > on specific LSE instruction to interface to co-processor [1]. > > Fixes: f0c7bb1bf778 ("net/octeontx/base: add octeontx IO operations") > Cc: jerinj@marvell.com > Cc: stable@dpdk.org > > [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Reviewed-by: Jerin Jacob <jerinj@marvell.com> > --- > v3: > Keep inline assembly and add sve extension to fix issue. (Pavan) > > drivers/net/octeontx/base/octeontx_io.h | 10 ++++++++-- > 1 file changed, 8 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h > index 04b9ce191..d0b9cfbc6 100644 > --- a/drivers/net/octeontx/base/octeontx_io.h > +++ b/drivers/net/octeontx/base/octeontx_io.h > @@ -52,6 +52,11 @@ do { \ > #endif > > #if defined(RTE_ARCH_ARM64) > +#if defined(__ARM_FEATURE_SVE) > +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" > +#else > +#define __LSE_PREAMBLE " .cpu generic+lse\n" > +#endif > /** > * Perform an atomic fetch-and-add operation. > */ > @@ -61,7 +66,7 @@ octeontx_reg_ldadd_u64(void *addr, int64_t off) > uint64_t old_val; > > __asm__ volatile( > - " .cpu generic+lse\n" > + __LSE_PREAMBLE > " ldadd %1, %0, [%2]\n" > : "=r" (old_val) : "r" (off), "r" (addr) : "memory"); > > @@ -98,12 +103,13 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], > > /* LDEOR initiates atomic transfer to I/O device */ > __asm__ volatile( > - " .cpu generic+lse\n" > + __LSE_PREAMBLE > " ldeor xzr, %0, [%1]\n" > : "=r" (result) : "r" (ioreg_va) : "memory"); > } while (!result); > } > > +#undef __LSE_PREAMBLE > #else > > static inline uint64_t > -- > 2.25.1 > ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 4/5] common/octeontx2: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang ` (2 preceding siblings ...) 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-12 4:38 ` Jerin Jacob 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang 2021-01-14 15:18 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand 5 siblings, 1 reply; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) To: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh Cc: dev, vladimir.medvedkin, hemant.agrawal, honnappa.nagarahalli, nd, Ruifeng Wang, stable Building with gcc 10.2 with SVE extension enabled got error: {standard input}: Assembler messages: {standard input}:4002: Error: selected processor does not support `mov z3.b,#0' {standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7' {standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]' {standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7' This is because inline assembly code explicitly resets cpu model to not have SVE support. Thus SVE instructions generated by compiler auto vectorization got rejected by assembler. Added SVE to the cpu model specified by inline assembly for SVE support. Not replacing the inline assembly with C atomics because the driver relies on specific LSE instruction to interface to co-processor [1]. Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs") Cc: jerinj@marvell.com Cc: stable@dpdk.org [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- v3: Keep inline assembly and add sve extension to fix issue. (Pavan) drivers/common/octeontx2/otx2_io_arm64.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h index b5c85d9a6..34268e3af 100644 --- a/drivers/common/octeontx2/otx2_io_arm64.h +++ b/drivers/common/octeontx2/otx2_io_arm64.h @@ -21,6 +21,12 @@ #define otx2_prefetch_store_keep(ptr) ({\ asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); }) +#if defined(__ARM_FEATURE_SVE) +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" +#else +#define __LSE_PREAMBLE " .cpu generic+lse\n" +#endif + static __rte_always_inline uint64_t otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) { @@ -28,7 +34,7 @@ otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) /* Atomic add with no ordering */ asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldadd %x[i], %x[r], [%[b]]" : [r] "=r" (result), "+m" (*ptr) : [i] "r" (incr), [b] "r" (ptr) @@ -43,7 +49,7 @@ otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) /* Atomic add with ordering */ asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldadda %x[i], %x[r], [%[b]]" : [r] "=r" (result), "+m" (*ptr) : [i] "r" (incr), [b] "r" (ptr) @@ -57,7 +63,7 @@ otx2_lmt_submit(rte_iova_t io_address) uint64_t result; asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldeor xzr,%x[rf],[%[rs]]" : [rf] "=r"(result): [rs] "r"(io_address)); return result; @@ -69,7 +75,7 @@ otx2_lmt_submit_release(rte_iova_t io_address) uint64_t result; asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldeorl xzr,%x[rf],[%[rs]]" : [rf] "=r"(result) : [rs] "r"(io_address)); return result; @@ -104,4 +110,5 @@ otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw) dst128[i] = src128[i]; } +#undef __LSE_PREAMBLE #endif /* _OTX2_IO_ARM64_H_ */ -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 4/5] common/octeontx2: fix build with sve enabled 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang @ 2021-01-12 4:38 ` Jerin Jacob 0 siblings, 0 replies; 43+ messages in thread From: Jerin Jacob @ 2021-01-12 4:38 UTC (permalink / raw) To: Ruifeng Wang Cc: Jerin Jacob, Nithin Dabilpuram, Pavan Nikhilesh, dpdk-dev, Vladimir Medvedkin, Hemant Agrawal, Honnappa Nagarahalli, nd, dpdk stable On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > Building with gcc 10.2 with SVE extension enabled got error: > > {standard input}: Assembler messages: > {standard input}:4002: Error: selected processor does not support `mov z3.b,#0' > {standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7' > {standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]' > {standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7' > > This is because inline assembly code explicitly resets cpu model to > not have SVE support. Thus SVE instructions generated by compiler > auto vectorization got rejected by assembler. > > Added SVE to the cpu model specified by inline assembly for SVE support. > Not replacing the inline assembly with C atomics because the driver relies > on specific LSE instruction to interface to co-processor [1]. > > Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs") > Cc: jerinj@marvell.com > Cc: stable@dpdk.org Reviewed-by: Jerin Jacob <jerinj@marvell.com> > > [1] https://mails.dpdk.org/archives/dev/2021-January/196092.html > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> > --- > v3: > Keep inline assembly and add sve extension to fix issue. (Pavan) > > drivers/common/octeontx2/otx2_io_arm64.h | 15 +++++++++++---- > 1 file changed, 11 insertions(+), 4 deletions(-) > > diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h > index b5c85d9a6..34268e3af 100644 > --- a/drivers/common/octeontx2/otx2_io_arm64.h > +++ b/drivers/common/octeontx2/otx2_io_arm64.h > @@ -21,6 +21,12 @@ > #define otx2_prefetch_store_keep(ptr) ({\ > asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); }) > > +#if defined(__ARM_FEATURE_SVE) > +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" > +#else > +#define __LSE_PREAMBLE " .cpu generic+lse\n" > +#endif > + > static __rte_always_inline uint64_t > otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) > { > @@ -28,7 +34,7 @@ otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) > > /* Atomic add with no ordering */ > asm volatile ( > - ".cpu generic+lse\n" > + __LSE_PREAMBLE > "ldadd %x[i], %x[r], [%[b]]" > : [r] "=r" (result), "+m" (*ptr) > : [i] "r" (incr), [b] "r" (ptr) > @@ -43,7 +49,7 @@ otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) > > /* Atomic add with ordering */ > asm volatile ( > - ".cpu generic+lse\n" > + __LSE_PREAMBLE > "ldadda %x[i], %x[r], [%[b]]" > : [r] "=r" (result), "+m" (*ptr) > : [i] "r" (incr), [b] "r" (ptr) > @@ -57,7 +63,7 @@ otx2_lmt_submit(rte_iova_t io_address) > uint64_t result; > > asm volatile ( > - ".cpu generic+lse\n" > + __LSE_PREAMBLE > "ldeor xzr,%x[rf],[%[rs]]" : > [rf] "=r"(result): [rs] "r"(io_address)); > return result; > @@ -69,7 +75,7 @@ otx2_lmt_submit_release(rte_iova_t io_address) > uint64_t result; > > asm volatile ( > - ".cpu generic+lse\n" > + __LSE_PREAMBLE > "ldeorl xzr,%x[rf],[%[rs]]" : > [rf] "=r"(result) : [rs] "r"(io_address)); > return result; > @@ -104,4 +110,5 @@ otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw) > dst128[i] = src128[i]; > } > > +#undef __LSE_PREAMBLE > #endif /* _OTX2_IO_ARM64_H_ */ > -- > 2.25.1 > ^ permalink raw reply [flat|nested] 43+ messages in thread
* [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang ` (3 preceding siblings ...) 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang @ 2021-01-12 2:57 ` Ruifeng Wang 2021-01-12 4:44 ` Jerin Jacob 2021-01-13 2:08 ` Honnappa Nagarahalli 2021-01-14 15:18 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand 5 siblings, 2 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-12 2:57 UTC (permalink / raw) To: Jerin Jacob, Ruifeng Wang, Jan Viktorin, Bruce Richardson Cc: dev, vladimir.medvedkin, pbhagavatula, hemant.agrawal, honnappa.nagarahalli, nd Add Arm Neoverse N2 cpu support. Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> --- v3: Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa) config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ config/arm/meson.build | 11 ++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 config/arm/arm64_n2_linux_gcc diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc new file mode 100644 index 000000000..78f6f3e2b --- /dev/null +++ b/config/arm/arm64_n2_linux_gcc @@ -0,0 +1,17 @@ +[binaries] +c = 'aarch64-linux-gnu-gcc' +cpp = 'aarch64-linux-gnu-cpp' +ar = 'aarch64-linux-gnu-gcc-ar' +strip = 'aarch64-linux-gnu-strip' +pkgconfig = 'aarch64-linux-gnu-pkg-config' +pcap-config = '' + +[host_machine] +system = 'linux' +cpu_family = 'aarch64' +cpu = 'armv8-a' +endian = 'little' + +[properties] +implementor_id = '0x41' +implementor_pn = '0xd49' diff --git a/config/arm/meson.build b/config/arm/meson.build index 42b4e43c7..5fd1c40a0 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -89,6 +89,14 @@ flags_n1generic_extra = [ ['RTE_MAX_NUMA_NODES', 1], ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], ['RTE_LIBRTE_VHOST_NUMA', false]] +flags_n2generic_extra = [ + ['RTE_MACHINE', '"neoverse-n2"'], + ['RTE_MAX_LCORE', 64], + ['RTE_CACHE_LINE_SIZE', 64], + ['RTE_ARM_FEATURE_ATOMICS', true], + ['RTE_USE_C11_MEM_MODEL', true], + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], + ['RTE_LIBRTE_VHOST_NUMA', false]] machine_args_generic = [ ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 +108,8 @@ machine_args_generic = [ ['0xd09', ['-mcpu=cortex-a73']], ['0xd0a', ['-mcpu=cortex-a75']], ['0xd0b', ['-mcpu=cortex-a76']], - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]] + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra], + ['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]] machine_args_cavium = [ ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], -- 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang @ 2021-01-12 4:44 ` Jerin Jacob 2021-01-13 2:08 ` Honnappa Nagarahalli 1 sibling, 0 replies; 43+ messages in thread From: Jerin Jacob @ 2021-01-12 4:44 UTC (permalink / raw) To: Ruifeng Wang Cc: Jerin Jacob, Jan Viktorin, Bruce Richardson, dpdk-dev, Vladimir Medvedkin, Pavan Nikhilesh, Hemant Agrawal, Honnappa Nagarahalli, nd On Tue, Jan 12, 2021 at 8:28 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > Add Arm Neoverse N2 cpu support. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Acked-by: Jerin Jacob <jerinj@marvell.com> > --- > v3: > Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa) > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > config/arm/meson.build | 11 ++++++++++- > 2 files changed, 27 insertions(+), 1 deletion(-) > create mode 100644 config/arm/arm64_n2_linux_gcc > > diff --git a/config/arm/arm64_n2_linux_gcc b/config/arm/arm64_n2_linux_gcc > new file mode 100644 > index 000000000..78f6f3e2b > --- /dev/null > +++ b/config/arm/arm64_n2_linux_gcc > @@ -0,0 +1,17 @@ > +[binaries] > +c = 'aarch64-linux-gnu-gcc' > +cpp = 'aarch64-linux-gnu-cpp' > +ar = 'aarch64-linux-gnu-gcc-ar' > +strip = 'aarch64-linux-gnu-strip' > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > +pcap-config = '' > + > +[host_machine] > +system = 'linux' > +cpu_family = 'aarch64' > +cpu = 'armv8-a' > +endian = 'little' > + > +[properties] > +implementor_id = '0x41' > +implementor_pn = '0xd49' > diff --git a/config/arm/meson.build b/config/arm/meson.build > index 42b4e43c7..5fd1c40a0 100644 > --- a/config/arm/meson.build > +++ b/config/arm/meson.build > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > ['RTE_MAX_NUMA_NODES', 1], > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > ['RTE_LIBRTE_VHOST_NUMA', false]] > +flags_n2generic_extra = [ > + ['RTE_MACHINE', '"neoverse-n2"'], > + ['RTE_MAX_LCORE', 64], > + ['RTE_CACHE_LINE_SIZE', 64], > + ['RTE_ARM_FEATURE_ATOMICS', true], > + ['RTE_USE_C11_MEM_MODEL', true], > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > + ['RTE_LIBRTE_VHOST_NUMA', false]] > > machine_args_generic = [ > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], > @@ -100,7 +108,8 @@ machine_args_generic = [ > ['0xd09', ['-mcpu=cortex-a73']], > ['0xd0a', ['-mcpu=cortex-a75']], > ['0xd0b', ['-mcpu=cortex-a76']], > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra]] > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], flags_n1generic_extra], > + ['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]] > > machine_args_cavium = [ > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > -- > 2.25.1 > ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang 2021-01-12 4:44 ` Jerin Jacob @ 2021-01-13 2:08 ` Honnappa Nagarahalli 1 sibling, 0 replies; 43+ messages in thread From: Honnappa Nagarahalli @ 2021-01-13 2:08 UTC (permalink / raw) To: Ruifeng Wang, jerinj, Ruifeng Wang, Jan Viktorin, Bruce Richardson Cc: dev, vladimir.medvedkin, pbhagavatula, hemant.agrawal, nd, Honnappa Nagarahalli, nd <snip> > > Add Arm Neoverse N2 cpu support. > > Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com> Looks good Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> > --- > v3: > Changed arch extension from sve to sve2 as N2 supports sve2. (Honnappa) > > config/arm/arm64_n2_linux_gcc | 17 +++++++++++++++++ > config/arm/meson.build | 11 ++++++++++- > 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 > config/arm/arm64_n2_linux_gcc > > diff --git a/config/arm/arm64_n2_linux_gcc > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index > 000000000..78f6f3e2b > --- /dev/null > +++ b/config/arm/arm64_n2_linux_gcc > @@ -0,0 +1,17 @@ > +[binaries] > +c = 'aarch64-linux-gnu-gcc' > +cpp = 'aarch64-linux-gnu-cpp' > +ar = 'aarch64-linux-gnu-gcc-ar' > +strip = 'aarch64-linux-gnu-strip' > +pkgconfig = 'aarch64-linux-gnu-pkg-config' > +pcap-config = '' > + > +[host_machine] > +system = 'linux' > +cpu_family = 'aarch64' > +cpu = 'armv8-a' > +endian = 'little' > + > +[properties] > +implementor_id = '0x41' > +implementor_pn = '0xd49' > diff --git a/config/arm/meson.build b/config/arm/meson.build index > 42b4e43c7..5fd1c40a0 100644 > --- a/config/arm/meson.build > +++ b/config/arm/meson.build > @@ -89,6 +89,14 @@ flags_n1generic_extra = [ > ['RTE_MAX_NUMA_NODES', 1], > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > ['RTE_LIBRTE_VHOST_NUMA', false]] > +flags_n2generic_extra = [ > + ['RTE_MACHINE', '"neoverse-n2"'], > + ['RTE_MAX_LCORE', 64], > + ['RTE_CACHE_LINE_SIZE', 64], > + ['RTE_ARM_FEATURE_ATOMICS', true], > + ['RTE_USE_C11_MEM_MODEL', true], > + ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false], > + ['RTE_LIBRTE_VHOST_NUMA', false]] > > machine_args_generic = [ > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7 > +108,8 @@ machine_args_generic = [ > ['0xd09', ['-mcpu=cortex-a73']], > ['0xd0a', ['-mcpu=cortex-a75']], > ['0xd0b', ['-mcpu=cortex-a76']], > - ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > flags_n1generic_extra]] > + ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'], > flags_n1generic_extra], > + ['0xd49', ['-march=armv8.5-a+crypto+sve2'], flags_n2generic_extra]] > > machine_args_cavium = [ > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']], > -- > 2.25.1 ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang ` (4 preceding siblings ...) 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang @ 2021-01-14 15:18 ` David Marchand 2021-01-14 15:40 ` David Marchand 5 siblings, 1 reply; 43+ messages in thread From: David Marchand @ 2021-01-14 15:18 UTC (permalink / raw) To: Ruifeng Wang, Honnappa Nagarahalli Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh, Jerin Jacob Kollanukkaran, Hemant Agrawal, nd Ruifeng, Honnappa, On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > Added lpm4 lookupx4 implementation by using Arm SVE extension. > The SVE is Scalable Vector Extension which is exposed to the > user with a vector length agnostic interface. > Refer to [1] for more information about SVE. > > Configuration was added for Neoverse N2 CPU which has SVE support. > > Some bugs were fixed so compiling with sve enabled can pass. > > [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve Can you point at a toolchain that supports SVE without having to register to some commercial spamming system? :-) The only aarch64-linux-gnu- toolchain I found on the ARM website is a 8.x gcc that does not seem to support SVE. -- David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support 2021-01-14 15:18 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand @ 2021-01-14 15:40 ` David Marchand 2021-01-15 7:02 ` Ruifeng Wang 0 siblings, 1 reply; 43+ messages in thread From: David Marchand @ 2021-01-14 15:40 UTC (permalink / raw) To: Ruifeng Wang, Honnappa Nagarahalli Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh, Jerin Jacob Kollanukkaran, Hemant Agrawal, nd On Thu, Jan 14, 2021 at 4:18 PM David Marchand <david.marchand@redhat.com> wrote: > > Ruifeng, Honnappa, > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> wrote: > > > > Added lpm4 lookupx4 implementation by using Arm SVE extension. > > The SVE is Scalable Vector Extension which is exposed to the > > user with a vector length agnostic interface. > > Refer to [1] for more information about SVE. > > > > Configuration was added for Neoverse N2 CPU which has SVE support. > > > > Some bugs were fixed so compiling with sve enabled can pass. > > > > [1] https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-instruction-emulator/resources/tutorials/sve > > Can you point at a toolchain that supports SVE without having to > register to some commercial spamming system? :-) > The only aarch64-linux-gnu- toolchain I found on the ARM website is a > 8.x gcc that does not seem to support SVE. I tested this using https://developer.arm.com/-/media/Files/downloads/gnu-a/10.2-2020.11/binrel/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu.tar.xz But I had to modify the cross compile prefix in config/arm/arm64_n2_linux_gcc (adding a none_). I am still interested in a toolchain that works out of the box. Series applied, thanks. -- David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support 2021-01-14 15:40 ` David Marchand @ 2021-01-15 7:02 ` Ruifeng Wang 0 siblings, 0 replies; 43+ messages in thread From: Ruifeng Wang @ 2021-01-15 7:02 UTC (permalink / raw) To: David Marchand, Honnappa Nagarahalli Cc: dev, Vladimir Medvedkin, Pavan Nikhilesh, jerinj, hemant.agrawal, nd, nd > -----Original Message----- > From: David Marchand <david.marchand@redhat.com> > Sent: Thursday, January 14, 2021 11:40 PM > To: Ruifeng Wang <Ruifeng.Wang@arm.com>; Honnappa Nagarahalli > <Honnappa.Nagarahalli@arm.com> > Cc: dev <dev@dpdk.org>; Vladimir Medvedkin > <vladimir.medvedkin@intel.com>; Pavan Nikhilesh > <pbhagavatula@marvell.com>; jerinj@marvell.com; > hemant.agrawal@nxp.com; nd <nd@arm.com> > Subject: Re: [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support > > On Thu, Jan 14, 2021 at 4:18 PM David Marchand > <david.marchand@redhat.com> wrote: > > > > Ruifeng, Honnappa, > > > > On Tue, Jan 12, 2021 at 3:57 AM Ruifeng Wang <ruifeng.wang@arm.com> > wrote: > > > > > > Added lpm4 lookupx4 implementation by using Arm SVE extension. > > > The SVE is Scalable Vector Extension which is exposed to the user > > > with a vector length agnostic interface. > > > Refer to [1] for more information about SVE. > > > > > > Configuration was added for Neoverse N2 CPU which has SVE support. > > > > > > Some bugs were fixed so compiling with sve enabled can pass. > > > > > > [1] > > > https://developer.arm.com/tools-and-software/server-and- > hpc/compile/ > > > arm-instruction-emulator/resources/tutorials/sve > > > > Can you point at a toolchain that supports SVE without having to > > register to some commercial spamming system? :-) The only > > aarch64-linux-gnu- toolchain I found on the ARM website is a 8.x gcc > > that does not seem to support SVE. > > I tested this using > https://developer.arm.com/-/media/Files/downloads/gnu-a/10.2- > 2020.11/binrel/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu.tar.xz Yes, gcc-10 has SVE support. Arm cross compilers available at: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads > But I had to modify the cross compile prefix in > config/arm/arm64_n2_linux_gcc (adding a none_). > I am still interested in a toolchain that works out of the box. I think on Ubuntu, it will work out of the box after installing package gcc-10-aarch64-linux-gnu. > > Series applied, thanks. Thank you. > > > -- > David Marchand ^ permalink raw reply [flat|nested] 43+ messages in thread
end of thread, other threads:[~2021-01-28 12:25 UTC | newest] Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-12-18 10:12 [dpdk-dev] [RFC PATCH] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-05 15:44 ` Medvedkin, Vladimir 2021-01-06 10:11 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-13 18:54 ` Medvedkin, Vladimir 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled Ruifeng Wang 2021-01-09 0:06 ` Honnappa Nagarahalli 2021-01-09 2:11 ` oulijun 2021-01-11 2:39 ` Ruifeng Wang 2021-01-11 13:38 ` Honnappa Nagarahalli 2021-01-09 2:15 ` oulijun 2021-01-11 2:27 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 3/5] net/octeontx: " Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 4/5] common/octeontx2: " Ruifeng Wang 2021-01-08 10:29 ` [dpdk-dev] [EXT] " Pavan Nikhilesh Bhagavatula 2021-01-11 9:51 ` Ruifeng Wang 2021-01-08 8:25 ` [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2 Ruifeng Wang 2021-01-08 23:58 ` Honnappa Nagarahalli 2021-01-11 3:01 ` Ruifeng Wang 2021-01-11 3:09 ` Jerin Jacob 2021-01-11 8:32 ` Ruifeng Wang 2021-01-11 13:58 ` Honnappa Nagarahalli 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 1/5] lpm: add sve support for lookup on Arm platform Ruifeng Wang 2021-01-13 15:58 ` David Marchand 2021-01-27 13:04 ` David Marchand 2021-01-27 21:03 ` Honnappa Nagarahalli 2021-01-28 8:03 ` David Marchand 2021-01-28 12:24 ` Honnappa Nagarahalli 2021-01-28 5:47 ` Ruifeng Wang 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 2/5] net/hns3: fix build with sve enabled Ruifeng Wang 2021-01-13 2:16 ` Honnappa Nagarahalli 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 3/5] net/octeontx: " Ruifeng Wang 2021-01-12 4:39 ` Jerin Jacob 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 4/5] common/octeontx2: " Ruifeng Wang 2021-01-12 4:38 ` Jerin Jacob 2021-01-12 2:57 ` [dpdk-dev] [PATCH v3 5/5] config: add Arm Neoverse N2 Ruifeng Wang 2021-01-12 4:44 ` Jerin Jacob 2021-01-13 2:08 ` Honnappa Nagarahalli 2021-01-14 15:18 ` [dpdk-dev] [PATCH v3 0/5] lpm lookup with sve support David Marchand 2021-01-14 15:40 ` David Marchand 2021-01-15 7:02 ` Ruifeng Wang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).