* [PATCH 1/1] lpm: add a scalar version of lookupx4 function @ 2022-05-10 11:58 Stanislaw Kardach 2022-05-19 17:02 ` Medvedkin, Vladimir 2022-05-27 18:18 ` [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 0 siblings, 2 replies; 19+ messages in thread From: Stanislaw Kardach @ 2022-05-10 11:58 UTC (permalink / raw) To: Bruce Richardson Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream, Stanislaw Kardach From: Michal Mazurek <maz@semihalf.com> Add an implementation of the rte_lpm_lookupx4() function for platforms without support for vector operations. This will be useful in the upcoming RISC-V port as well as any platform which may want to start with a basic level of LPM support. Signed-off-by: Michal Mazurek <maz@semihalf.com> Signed-off-by: Stanislaw Kardach <kda@semihalf.com> --- doc/guides/rel_notes/release_22_07.rst | 5 + lib/lpm/meson.build | 1 + lib/lpm/rte_lpm.h | 4 +- lib/lpm/rte_lpm_scalar.h | 122 +++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 lib/lpm/rte_lpm_scalar.h diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 4ae91dd94d..73e8d632f2 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -70,6 +70,11 @@ New Features * Added AH mode support in lookaside protocol (IPsec) for CN9K & CN10K. * Added AES-GMAC support in lookaside protocol (IPsec) for CN9K & CN10K. +* **Added scalar version of the LPM library.** + + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back + implementation for platforms that don't support vector operations. + Removed Items ------------- diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build index 78d91d3421..6b47361fce 100644 --- a/lib/lpm/meson.build +++ b/lib/lpm/meson.build @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') indirect_headers += files( 'rte_lpm_altivec.h', 'rte_lpm_neon.h', + 'rte_lpm_scalar.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h', ) diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h index eb91960e81..b5db6a353a 100644 --- a/lib/lpm/rte_lpm.h +++ b/lib/lpm/rte_lpm.h @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], #endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" -#else +#elif defined(RTE_ARCH_X86) #include "rte_lpm_sse.h" +#else +#include "rte_lpm_scalar.h" #endif #ifdef __cplusplus diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h new file mode 100644 index 0000000000..991b94e687 --- /dev/null +++ b/lib/lpm/rte_lpm_scalar.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 StarFive + * Copyright(c) 2022 SiFive + * Copyright(c) 2022 Semihalf + */ + +#ifndef _RTE_LPM_SCALAR_H_ +#define _RTE_LPM_SCALAR_H_ + +#include <rte_branch_prediction.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + rte_xmm_t i24; + rte_xmm_t i8; + uint32_t tbl[4]; + uint64_t pt, pt2; + const uint32_t *ptbl; + + const rte_xmm_t mask8 = { + .u32 = {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX}}; + + /* + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries + * as one 64-bit value (0x0300000003000000). + */ + const uint64_t mask_xv = + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); + + /* + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries + * as one 64-bit value (0x0100000001000000). + */ + const uint64_t mask_v = + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); + + /* get 4 indexes for tbl24[]. */ + i24.x = ip; + i24.u32[0] >>= CHAR_BIT; + i24.u32[1] >>= CHAR_BIT; + i24.u32[2] >>= CHAR_BIT; + i24.u32[3] >>= CHAR_BIT; + + /* extract values from tbl24[] */ + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[0]]; + tbl[0] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[1]]; + tbl[1] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[2]]; + tbl[2] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[3]]; + tbl[3] = *ptbl; + + /* get 4 indexes for tbl8[]. */ + i8.x = ip; + i8.u64[0] &= mask8.u64[0]; + i8.u64[1] &= mask8.u64[1]; + + pt = (uint64_t)tbl[0] | + (uint64_t)tbl[1] << 32; + pt2 = (uint64_t)tbl[2] | + (uint64_t)tbl[3] << 32; + + /* search successfully finished for all 4 IP addresses. */ + if (likely((pt & mask_xv) == mask_v) && + likely((pt2 & mask_xv) == mask_v)) { + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; + return; + } + + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[0] = i8.u32[0] + + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; + tbl[0] = *ptbl; + } + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[1] = i8.u32[1] + + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; + tbl[1] = *ptbl; + } + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[2] = i8.u32[2] + + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; + tbl[2] = *ptbl; + } + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[3] = i8.u32[3] + + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; + tbl[3] = *ptbl; + } + + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SCALAR_H_ */ -- 2.30.2 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/1] lpm: add a scalar version of lookupx4 function 2022-05-10 11:58 [PATCH 1/1] lpm: add a scalar version of lookupx4 function Stanislaw Kardach @ 2022-05-19 17:02 ` Medvedkin, Vladimir 2022-05-24 16:28 ` Stanisław Kardach 2022-05-27 18:18 ` [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 1 sibling, 1 reply; 19+ messages in thread From: Medvedkin, Vladimir @ 2022-05-19 17:02 UTC (permalink / raw) To: Stanislaw Kardach Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream, Bruce Richardson Hi Stanislaw, Michal, As far as I can see, this implementation almost completely repeats other lookupx4() implementations, except for the use of vector instructions. On my board (x86_64) in lpm_perf_autotest your implementation takes about: LPM LookupX4: 29.5 cycles (fails = 12.5%) replacing this code with a simple loop with rte_lpm_lookup(): uint32_t nh; int i, ret; for (i = 0; i < 4; i++) { ret = rte_lpm_lookup((struct rte_lpm *)lpm, ((rte_xmm_t)ip).u32[i], &nh); hop[i] = (ret == 0) ? nh : defv; } works faster: LPM LookupX4: 22.2 cycles (fails = 12.5%) I'm wondering if this will work faster on your board (I assume it it RISC-V arch)? Thanks! On 10/05/2022 12:58, Stanislaw Kardach wrote: > From: Michal Mazurek <maz@semihalf.com> > > Add an implementation of the rte_lpm_lookupx4() function for platforms > without support for vector operations. > > This will be useful in the upcoming RISC-V port as well as any platform > which may want to start with a basic level of LPM support. > > Signed-off-by: Michal Mazurek <maz@semihalf.com> > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> > --- > doc/guides/rel_notes/release_22_07.rst | 5 + > lib/lpm/meson.build | 1 + > lib/lpm/rte_lpm.h | 4 +- > lib/lpm/rte_lpm_scalar.h | 122 +++++++++++++++++++++++++ > 4 files changed, 131 insertions(+), 1 deletion(-) > create mode 100644 lib/lpm/rte_lpm_scalar.h > > diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst > index 4ae91dd94d..73e8d632f2 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -70,6 +70,11 @@ New Features > * Added AH mode support in lookaside protocol (IPsec) for CN9K & CN10K. > * Added AES-GMAC support in lookaside protocol (IPsec) for CN9K & CN10K. > > +* **Added scalar version of the LPM library.** > + > + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back > + implementation for platforms that don't support vector operations. > + > > Removed Items > ------------- > diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build > index 78d91d3421..6b47361fce 100644 > --- a/lib/lpm/meson.build > +++ b/lib/lpm/meson.build > @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') > indirect_headers += files( > 'rte_lpm_altivec.h', > 'rte_lpm_neon.h', > + 'rte_lpm_scalar.h', > 'rte_lpm_sse.h', > 'rte_lpm_sve.h', > ) > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > index eb91960e81..b5db6a353a 100644 > --- a/lib/lpm/rte_lpm.h > +++ b/lib/lpm/rte_lpm.h > @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > #endif > #elif defined(RTE_ARCH_PPC_64) > #include "rte_lpm_altivec.h" > -#else > +#elif defined(RTE_ARCH_X86) > #include "rte_lpm_sse.h" > +#else > +#include "rte_lpm_scalar.h" > #endif > > #ifdef __cplusplus > diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h > new file mode 100644 > index 0000000000..991b94e687 > --- /dev/null > +++ b/lib/lpm/rte_lpm_scalar.h > @@ -0,0 +1,122 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2022 StarFive > + * Copyright(c) 2022 SiFive > + * Copyright(c) 2022 Semihalf > + */ > + > +#ifndef _RTE_LPM_SCALAR_H_ > +#define _RTE_LPM_SCALAR_H_ > + > +#include <rte_branch_prediction.h> > +#include <rte_byteorder.h> > +#include <rte_common.h> > +#include <rte_vect.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + rte_xmm_t i24; > + rte_xmm_t i8; > + uint32_t tbl[4]; > + uint64_t pt, pt2; > + const uint32_t *ptbl; > + > + const rte_xmm_t mask8 = { > + .u32 = {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX}}; > + > + /* > + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries > + * as one 64-bit value (0x0300000003000000). > + */ > + const uint64_t mask_xv = > + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | > + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); > + > + /* > + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries > + * as one 64-bit value (0x0100000001000000). > + */ > + const uint64_t mask_v = > + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | > + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); > + > + /* get 4 indexes for tbl24[]. */ > + i24.x = ip; > + i24.u32[0] >>= CHAR_BIT; > + i24.u32[1] >>= CHAR_BIT; > + i24.u32[2] >>= CHAR_BIT; > + i24.u32[3] >>= CHAR_BIT; > + > + /* extract values from tbl24[] */ > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[0]]; > + tbl[0] = *ptbl; > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[1]]; > + tbl[1] = *ptbl; > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[2]]; > + tbl[2] = *ptbl; > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[3]]; > + tbl[3] = *ptbl; > + > + /* get 4 indexes for tbl8[]. */ > + i8.x = ip; > + i8.u64[0] &= mask8.u64[0]; > + i8.u64[1] &= mask8.u64[1]; > + > + pt = (uint64_t)tbl[0] | > + (uint64_t)tbl[1] << 32; > + pt2 = (uint64_t)tbl[2] | > + (uint64_t)tbl[3] << 32; > + > + /* search successfully finished for all 4 IP addresses. */ > + if (likely((pt & mask_xv) == mask_v) && > + likely((pt2 & mask_xv) == mask_v)) { > + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; > + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; > + return; > + } > + > + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > + i8.u32[0] = i8.u32[0] + > + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; > + tbl[0] = *ptbl; > + } > + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > + i8.u32[1] = i8.u32[1] + > + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; > + tbl[1] = *ptbl; > + } > + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > + i8.u32[2] = i8.u32[2] + > + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; > + tbl[2] = *ptbl; > + } > + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > + i8.u32[3] = i8.u32[3] + > + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; > + tbl[3] = *ptbl; > + } > + > + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; > + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; > + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; > + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_LPM_SCALAR_H_ */ -- Regards, Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/1] lpm: add a scalar version of lookupx4 function 2022-05-19 17:02 ` Medvedkin, Vladimir @ 2022-05-24 16:28 ` Stanisław Kardach 2022-05-27 11:16 ` Stanisław Kardach 0 siblings, 1 reply; 19+ messages in thread From: Stanisław Kardach @ 2022-05-24 16:28 UTC (permalink / raw) To: Medvedkin, Vladimir Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream, Bruce Richardson On Thu, May 19, 2022 at 7:04 PM Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote: > > Hi Stanislaw, Michal, > > As far as I can see, this implementation almost completely repeats other > lookupx4() implementations, except for the use of vector instructions. > > On my board (x86_64) in lpm_perf_autotest your implementation takes about: > LPM LookupX4: 29.5 cycles (fails = 12.5%) > > replacing this code with a simple loop with rte_lpm_lookup(): > > uint32_t nh; > int i, ret; > > for (i = 0; i < 4; i++) { > ret = rte_lpm_lookup((struct rte_lpm *)lpm, ((rte_xmm_t)ip).u32[i], &nh); > hop[i] = (ret == 0) ? nh : defv; > } > > works faster: > LPM LookupX4: 22.2 cycles (fails = 12.5%) > > I'm wondering if this will work faster on your board (I assume it it > RISC-V arch)? Hi Vladimir, On my HiFive Unmatched RISC-V board there is a marginal difference (~ -1.56%): Our version: 210.5 cycles (fails = 12.5%) rte_lpm_lookup version: 213.8 cycles (fails = 12.5%) Given that x86 is faster with rte_lpm_lookup, I'll change to this implementation in the next version. That said I wonder why do we have different const requirements for rte_lpm_lookup() and rte_lpm_lookupx4(): static inline int rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) static inline void rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv); I think both should be const. > > Thanks! > > On 10/05/2022 12:58, Stanislaw Kardach wrote: > > From: Michal Mazurek <maz@semihalf.com> > > > > Add an implementation of the rte_lpm_lookupx4() function for platforms > > without support for vector operations. > > > > This will be useful in the upcoming RISC-V port as well as any platform > > which may want to start with a basic level of LPM support. > > > > Signed-off-by: Michal Mazurek <maz@semihalf.com> > > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> > > --- > > doc/guides/rel_notes/release_22_07.rst | 5 + > > lib/lpm/meson.build | 1 + > > lib/lpm/rte_lpm.h | 4 +- > > lib/lpm/rte_lpm_scalar.h | 122 +++++++++++++++++++++++++ > > 4 files changed, 131 insertions(+), 1 deletion(-) > > create mode 100644 lib/lpm/rte_lpm_scalar.h > > > > diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst > > index 4ae91dd94d..73e8d632f2 100644 > > --- a/doc/guides/rel_notes/release_22_07.rst > > +++ b/doc/guides/rel_notes/release_22_07.rst > > @@ -70,6 +70,11 @@ New Features > > * Added AH mode support in lookaside protocol (IPsec) for CN9K & CN10K. > > * Added AES-GMAC support in lookaside protocol (IPsec) for CN9K & CN10K. > > > > +* **Added scalar version of the LPM library.** > > + > > + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back > > + implementation for platforms that don't support vector operations. > > + > > > > Removed Items > > ------------- > > diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build > > index 78d91d3421..6b47361fce 100644 > > --- a/lib/lpm/meson.build > > +++ b/lib/lpm/meson.build > > @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') > > indirect_headers += files( > > 'rte_lpm_altivec.h', > > 'rte_lpm_neon.h', > > + 'rte_lpm_scalar.h', > > 'rte_lpm_sse.h', > > 'rte_lpm_sve.h', > > ) > > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > > index eb91960e81..b5db6a353a 100644 > > --- a/lib/lpm/rte_lpm.h > > +++ b/lib/lpm/rte_lpm.h > > @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > #endif > > #elif defined(RTE_ARCH_PPC_64) > > #include "rte_lpm_altivec.h" > > -#else > > +#elif defined(RTE_ARCH_X86) > > #include "rte_lpm_sse.h" > > +#else > > +#include "rte_lpm_scalar.h" > > #endif > > > > #ifdef __cplusplus > > diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h > > new file mode 100644 > > index 0000000000..991b94e687 > > --- /dev/null > > +++ b/lib/lpm/rte_lpm_scalar.h > > @@ -0,0 +1,122 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2022 StarFive > > + * Copyright(c) 2022 SiFive > > + * Copyright(c) 2022 Semihalf > > + */ > > + > > +#ifndef _RTE_LPM_SCALAR_H_ > > +#define _RTE_LPM_SCALAR_H_ > > + > > +#include <rte_branch_prediction.h> > > +#include <rte_byteorder.h> > > +#include <rte_common.h> > > +#include <rte_vect.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + rte_xmm_t i24; > > + rte_xmm_t i8; > > + uint32_t tbl[4]; > > + uint64_t pt, pt2; > > + const uint32_t *ptbl; > > + > > + const rte_xmm_t mask8 = { > > + .u32 = {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX}}; > > + > > + /* > > + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries > > + * as one 64-bit value (0x0300000003000000). > > + */ > > + const uint64_t mask_xv = > > + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | > > + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); > > + > > + /* > > + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries > > + * as one 64-bit value (0x0100000001000000). > > + */ > > + const uint64_t mask_v = > > + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | > > + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); > > + > > + /* get 4 indexes for tbl24[]. */ > > + i24.x = ip; > > + i24.u32[0] >>= CHAR_BIT; > > + i24.u32[1] >>= CHAR_BIT; > > + i24.u32[2] >>= CHAR_BIT; > > + i24.u32[3] >>= CHAR_BIT; > > + > > + /* extract values from tbl24[] */ > > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[0]]; > > + tbl[0] = *ptbl; > > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[1]]; > > + tbl[1] = *ptbl; > > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[2]]; > > + tbl[2] = *ptbl; > > + ptbl = (const uint32_t *)&lpm->tbl24[i24.u32[3]]; > > + tbl[3] = *ptbl; > > + > > + /* get 4 indexes for tbl8[]. */ > > + i8.x = ip; > > + i8.u64[0] &= mask8.u64[0]; > > + i8.u64[1] &= mask8.u64[1]; > > + > > + pt = (uint64_t)tbl[0] | > > + (uint64_t)tbl[1] << 32; > > + pt2 = (uint64_t)tbl[2] | > > + (uint64_t)tbl[3] << 32; > > + > > + /* search successfully finished for all 4 IP addresses. */ > > + if (likely((pt & mask_xv) == mask_v) && > > + likely((pt2 & mask_xv) == mask_v)) { > > + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; > > + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; > > + return; > > + } > > + > > + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > > + i8.u32[0] = i8.u32[0] + > > + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; > > + tbl[0] = *ptbl; > > + } > > + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > > + i8.u32[1] = i8.u32[1] + > > + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; > > + tbl[1] = *ptbl; > > + } > > + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > > + i8.u32[2] = i8.u32[2] + > > + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; > > + tbl[2] = *ptbl; > > + } > > + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == > > + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { > > + i8.u32[3] = i8.u32[3] + > > + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; > > + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; > > + tbl[3] = *ptbl; > > + } > > + > > + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; > > + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; > > + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; > > + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; > > +} > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* _RTE_LPM_SCALAR_H_ */ > > -- > Regards, > Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/1] lpm: add a scalar version of lookupx4 function 2022-05-24 16:28 ` Stanisław Kardach @ 2022-05-27 11:16 ` Stanisław Kardach 2022-05-27 13:16 ` Medvedkin, Vladimir 0 siblings, 1 reply; 19+ messages in thread From: Stanisław Kardach @ 2022-05-27 11:16 UTC (permalink / raw) To: Medvedkin, Vladimir Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream, Bruce Richardson On Tue, May 24, 2022 at 6:28 PM Stanisław Kardach <kda@semihalf.com> wrote: <snip> > That said I wonder why do we have different const requirements for > rte_lpm_lookup() and rte_lpm_lookupx4(): > static inline int rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, > uint32_t *next_hop) > static inline void rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t > ip, uint32_t hop[4], uint32_t defv); > I think both should be const. > To re-iterate the question, should I also post a patch for changing rte_lpm_lookup() to add "const" to "struct rte_lpm *lpm" argument? rte_lpm_lookup_bulk_func() and rte_lpm_lookupx4() already take lpm as const. I'm pushing because otherwise I get a const discard warning in the scalar version of rte_lpm_lookupx4() utilizing rte_lpm_lookup(). Best Regards, Stanislaw Kardach ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/1] lpm: add a scalar version of lookupx4 function 2022-05-27 11:16 ` Stanisław Kardach @ 2022-05-27 13:16 ` Medvedkin, Vladimir 0 siblings, 0 replies; 19+ messages in thread From: Medvedkin, Vladimir @ 2022-05-27 13:16 UTC (permalink / raw) To: Stanisław Kardach Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream, Bruce Richardson Hi Stanislaw, On 27/05/2022 12:16, Stanisław Kardach wrote: > On Tue, May 24, 2022 at 6:28 PM Stanisław Kardach <kda@semihalf.com> wrote: > <snip> >> That said I wonder why do we have different const requirements for >> rte_lpm_lookup() and rte_lpm_lookupx4(): >> static inline int rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, >> uint32_t *next_hop) >> static inline void rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t >> ip, uint32_t hop[4], uint32_t defv); >> I think both should be const. >> > To re-iterate the question, should I also post a patch for changing > rte_lpm_lookup() to add "const" to "struct rte_lpm *lpm" argument? > rte_lpm_lookup_bulk_func() and rte_lpm_lookupx4() already take lpm as > const. > I'm pushing because otherwise I get a const discard warning in the > scalar version of rte_lpm_lookupx4() utilizing rte_lpm_lookup(). Since these are inline functions, there will be no problems with the ABI/API, so please add const to the *lpm argument. Thanks! > > Best Regards, > Stanislaw Kardach -- Regards, Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup 2022-05-10 11:58 [PATCH 1/1] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-05-19 17:02 ` Medvedkin, Vladimir @ 2022-05-27 18:18 ` Stanislaw Kardach 2022-05-27 18:18 ` [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 1 sibling, 2 replies; 19+ messages in thread From: Stanislaw Kardach @ 2022-05-27 18:18 UTC (permalink / raw) To: Vladimir Medvedkin Cc: Stanislaw Kardach, dev, Frank Zhao, Sam Grove, mw, upstream All other rte_lpm_lookup* functions take lpm argument as a const. As the basic rte_lpm_lookup() performs the same function, it should also do that. As this function is inline, no API/ABI change happens. Signed-off-by: Stanislaw Kardach <kda@semihalf.com> --- lib/lpm/rte_lpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h index eb91960e81..1cf863a146 100644 --- a/lib/lpm/rte_lpm.h +++ b/lib/lpm/rte_lpm.h @@ -279,7 +279,7 @@ rte_lpm_delete_all(struct rte_lpm *lpm); * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit */ static inline int -rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) +rte_lpm_lookup(const struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) { unsigned tbl24_index = (ip >> 8); uint32_t tbl_entry; -- 2.30.2 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-27 18:18 ` [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach @ 2022-05-27 18:18 ` Stanislaw Kardach 2022-05-27 20:15 ` Stephen Hemminger 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 1 sibling, 1 reply; 19+ messages in thread From: Stanislaw Kardach @ 2022-05-27 18:18 UTC (permalink / raw) To: Vladimir Medvedkin Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream, Stanislaw Kardach From: Michal Mazurek <maz@semihalf.com> Add an implementation of the rte_lpm_lookupx4() function for platforms without support for vector operations. This will be useful in the upcoming RISC-V port as well as any platform which may want to start with a basic level of LPM support. Signed-off-by: Michal Mazurek <maz@semihalf.com> Signed-off-by: Stanislaw Kardach <kda@semihalf.com> --- doc/guides/rel_notes/release_22_07.rst | 5 ++++ lib/lpm/meson.build | 1 + lib/lpm/rte_lpm.h | 4 ++- lib/lpm/rte_lpm_scalar.h | 36 ++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 lib/lpm/rte_lpm_scalar.h diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index e49cacecef..0cf3f71269 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -104,6 +104,11 @@ New Features * ``RTE_EVENT_QUEUE_ATTR_WEIGHT`` * ``RTE_EVENT_QUEUE_ATTR_AFFINITY`` +* **Added scalar version of the LPM library.** + + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back + implementation for platforms that don't support vector operations. + Removed Items ------------- diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build index 78d91d3421..6b47361fce 100644 --- a/lib/lpm/meson.build +++ b/lib/lpm/meson.build @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') indirect_headers += files( 'rte_lpm_altivec.h', 'rte_lpm_neon.h', + 'rte_lpm_scalar.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h', ) diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h index 1cf863a146..4f38864fde 100644 --- a/lib/lpm/rte_lpm.h +++ b/lib/lpm/rte_lpm.h @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], #endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" -#else +#elif defined(RTE_ARCH_X86) #include "rte_lpm_sse.h" +#else +#include "rte_lpm_scalar.h" #endif #ifdef __cplusplus diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h new file mode 100644 index 0000000000..2fc0e19161 --- /dev/null +++ b/lib/lpm/rte_lpm_scalar.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 StarFive + * Copyright(c) 2022 SiFive + * Copyright(c) 2022 Semihalf + */ + +#ifndef _RTE_LPM_SCALAR_H_ +#define _RTE_LPM_SCALAR_H_ + +#include <rte_branch_prediction.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32_t nh; + int i, ret; + + for (i = 0; i < 4; i++) { + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); + hop[i] = (ret == 0) ? nh : defv; + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SCALAR_H_ */ -- 2.30.2 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-27 18:18 ` [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach @ 2022-05-27 20:15 ` Stephen Hemminger 2022-05-30 7:52 ` Bruce Richardson 0 siblings, 1 reply; 19+ messages in thread From: Stephen Hemminger @ 2022-05-27 20:15 UTC (permalink / raw) To: Stanislaw Kardach Cc: Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream On Fri, 27 May 2022 20:18:22 +0200 Stanislaw Kardach <kda@semihalf.com> wrote: > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + uint32_t nh; > + int i, ret; > + > + for (i = 0; i < 4; i++) { > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > + hop[i] = (ret == 0) ? nh : defv; > + } > +} For performance, manually unroll the loop. ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-27 20:15 ` Stephen Hemminger @ 2022-05-30 7:52 ` Bruce Richardson 2022-05-30 8:00 ` Morten Brørup 0 siblings, 1 reply; 19+ messages in thread From: Bruce Richardson @ 2022-05-30 7:52 UTC (permalink / raw) To: Stephen Hemminger Cc: Stanislaw Kardach, Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream On Fri, May 27, 2022 at 01:15:20PM -0700, Stephen Hemminger wrote: > On Fri, 27 May 2022 20:18:22 +0200 > Stanislaw Kardach <kda@semihalf.com> wrote: > > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + uint32_t nh; > > + int i, ret; > > + > > + for (i = 0; i < 4; i++) { > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > > + hop[i] = (ret == 0) ? nh : defv; > > + } > > +} > > For performance, manually unroll the loop. Given a constant 4x iterations, will compilers not unroll this automatically. I think the loop is a little clearer if it can be kept /Bruce ^ permalink raw reply [flat|nested] 19+ messages in thread
* RE: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 7:52 ` Bruce Richardson @ 2022-05-30 8:00 ` Morten Brørup 2022-05-30 10:42 ` Bruce Richardson 0 siblings, 1 reply; 19+ messages in thread From: Morten Brørup @ 2022-05-30 8:00 UTC (permalink / raw) To: Bruce Richardson, Stephen Hemminger Cc: Stanislaw Kardach, Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > Sent: Monday, 30 May 2022 09.52 > > On Fri, May 27, 2022 at 01:15:20PM -0700, Stephen Hemminger wrote: > > On Fri, 27 May 2022 20:18:22 +0200 > > Stanislaw Kardach <kda@semihalf.com> wrote: > > > > > +static inline void > > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t > hop[4], > > > + uint32_t defv) > > > +{ > > > + uint32_t nh; > > > + int i, ret; > > > + > > > + for (i = 0; i < 4; i++) { > > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > > > + hop[i] = (ret == 0) ? nh : defv; > > > + } > > > +} > > > > For performance, manually unroll the loop. > > Given a constant 4x iterations, will compilers not unroll this > automatically. I think the loop is a little clearer if it can be kept > > /Bruce If in doubt, add this and look at the assembler output: #define REVIEW_INLINE_FUNCTIONS 1 #if REVIEW_INLINE_FUNCTIONS /* For compiler output review purposes only. */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-prototypes" void review_rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv) { rte_lpm_lookupx4(lpm, ip, hop, defv); } #pragma GCC diagnostic pop #endif /* REVIEW_INLINE_FUNCTIONS */ ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 8:00 ` Morten Brørup @ 2022-05-30 10:42 ` Bruce Richardson 2022-05-30 11:20 ` Stanisław Kardach 0 siblings, 1 reply; 19+ messages in thread From: Bruce Richardson @ 2022-05-30 10:42 UTC (permalink / raw) To: Morten Brørup Cc: Stephen Hemminger, Stanislaw Kardach, Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream On Mon, May 30, 2022 at 10:00:34AM +0200, Morten Brørup wrote: > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > Sent: Monday, 30 May 2022 09.52 > > > > On Fri, May 27, 2022 at 01:15:20PM -0700, Stephen Hemminger wrote: > > > On Fri, 27 May 2022 20:18:22 +0200 > > > Stanislaw Kardach <kda@semihalf.com> wrote: > > > > > > > +static inline void > > > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t > > hop[4], > > > > + uint32_t defv) > > > > +{ > > > > + uint32_t nh; > > > > + int i, ret; > > > > + > > > > + for (i = 0; i < 4; i++) { > > > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > > > > + hop[i] = (ret == 0) ? nh : defv; > > > > + } > > > > +} > > > > > > For performance, manually unroll the loop. > > > > Given a constant 4x iterations, will compilers not unroll this > > automatically. I think the loop is a little clearer if it can be kept > > > > /Bruce > > If in doubt, add this and look at the assembler output: > > #define REVIEW_INLINE_FUNCTIONS 1 > > #if REVIEW_INLINE_FUNCTIONS /* For compiler output review purposes only. */ > #pragma GCC diagnostic push > #pragma GCC diagnostic ignored "-Wmissing-prototypes" > void review_rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv) > { > rte_lpm_lookupx4(lpm, ip, hop, defv); > } > #pragma GCC diagnostic pop > #endif /* REVIEW_INLINE_FUNCTIONS */ > Used godbolt.org to check and indeed the function is not unrolled. (Gcc 11.2, with flags "-O3 -march=icelake-server"). Manually unrolling changes the assembly generated in interesting ways. For example, it appears to generate more cmov-type instructions for the miss/default-value case rather than using branches as in the looped version. Whether this is better or not may depend upon usecase - if one expects most lpm lookup entries to hit, then having (predictable) branches may well be cheaper. In any case, I'll withdraw any object to unrolling, but I'm still not convinced it's necessary. /Bruce ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 10:42 ` Bruce Richardson @ 2022-05-30 11:20 ` Stanisław Kardach 2022-05-30 12:46 ` Bruce Richardson 0 siblings, 1 reply; 19+ messages in thread From: Stanisław Kardach @ 2022-05-30 11:20 UTC (permalink / raw) To: Bruce Richardson Cc: Morten Brørup, Stephen Hemminger, Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream On Mon, May 30, 2022 at 12:42 PM Bruce Richardson <bruce.richardson@intel.com> wrote: > > On Mon, May 30, 2022 at 10:00:34AM +0200, Morten Brørup wrote: > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > Sent: Monday, 30 May 2022 09.52 > > > > > > On Fri, May 27, 2022 at 01:15:20PM -0700, Stephen Hemminger wrote: > > > > On Fri, 27 May 2022 20:18:22 +0200 > > > > Stanislaw Kardach <kda@semihalf.com> wrote: > > > > > > > > > +static inline void > > > > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t > > > hop[4], > > > > > + uint32_t defv) > > > > > +{ > > > > > + uint32_t nh; > > > > > + int i, ret; > > > > > + > > > > > + for (i = 0; i < 4; i++) { > > > > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > > > > > + hop[i] = (ret == 0) ? nh : defv; > > > > > + } > > > > > +} > > > > > > > > For performance, manually unroll the loop. > > > > > > Given a constant 4x iterations, will compilers not unroll this > > > automatically. I think the loop is a little clearer if it can be kept > > > > > > /Bruce > > > > If in doubt, add this and look at the assembler output: > > > > #define REVIEW_INLINE_FUNCTIONS 1 > > > > #if REVIEW_INLINE_FUNCTIONS /* For compiler output review purposes only. */ > > #pragma GCC diagnostic push > > #pragma GCC diagnostic ignored "-Wmissing-prototypes" > > void review_rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv) > > { > > rte_lpm_lookupx4(lpm, ip, hop, defv); > > } > > #pragma GCC diagnostic pop > > #endif /* REVIEW_INLINE_FUNCTIONS */ > > > > Used godbolt.org to check and indeed the function is not unrolled. > (Gcc 11.2, with flags "-O3 -march=icelake-server"). > > Manually unrolling changes the assembly generated in interesting ways. For > example, it appears to generate more cmov-type instructions for the > miss/default-value case rather than using branches as in the looped > version. Whether this is better or not may depend upon usecase - if one > expects most lpm lookup entries to hit, then having (predictable) branches > may well be cheaper. > > In any case, I'll withdraw any object to unrolling, but I'm still not > convinced it's necessary. > > /Bruce Interestingly enough until I've defined unlikely() in godbolt, I did not get any automatic unrolling on godbolt (either with x86 or RISC-V GCC). Did you get any compilation warnings? That said it only happens on O3 since it implies -fpeel-loops. O3 is the default for DPDK. ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 11:20 ` Stanisław Kardach @ 2022-05-30 12:46 ` Bruce Richardson 0 siblings, 0 replies; 19+ messages in thread From: Bruce Richardson @ 2022-05-30 12:46 UTC (permalink / raw) To: Stanisław Kardach Cc: Morten Brørup, Stephen Hemminger, Vladimir Medvedkin, Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream On Mon, May 30, 2022 at 01:20:50PM +0200, Stanisław Kardach wrote: > On Mon, May 30, 2022 at 12:42 PM Bruce Richardson > <bruce.richardson@intel.com> wrote: > > > > On Mon, May 30, 2022 at 10:00:34AM +0200, Morten Brørup wrote: > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > Sent: Monday, 30 May 2022 09.52 > > > > > > > > On Fri, May 27, 2022 at 01:15:20PM -0700, Stephen Hemminger wrote: > > > > > On Fri, 27 May 2022 20:18:22 +0200 > > > > > Stanislaw Kardach <kda@semihalf.com> wrote: > > > > > > > > > > > +static inline void > > > > > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t > > > > hop[4], > > > > > > + uint32_t defv) > > > > > > +{ > > > > > > + uint32_t nh; > > > > > > + int i, ret; > > > > > > + > > > > > > + for (i = 0; i < 4; i++) { > > > > > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[i], &nh); > > > > > > + hop[i] = (ret == 0) ? nh : defv; > > > > > > + } > > > > > > +} > > > > > > > > > > For performance, manually unroll the loop. > > > > > > > > Given a constant 4x iterations, will compilers not unroll this > > > > automatically. I think the loop is a little clearer if it can be kept > > > > > > > > /Bruce > > > > > > If in doubt, add this and look at the assembler output: > > > > > > #define REVIEW_INLINE_FUNCTIONS 1 > > > > > > #if REVIEW_INLINE_FUNCTIONS /* For compiler output review purposes only. */ > > > #pragma GCC diagnostic push > > > #pragma GCC diagnostic ignored "-Wmissing-prototypes" > > > void review_rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], uint32_t defv) > > > { > > > rte_lpm_lookupx4(lpm, ip, hop, defv); > > > } > > > #pragma GCC diagnostic pop > > > #endif /* REVIEW_INLINE_FUNCTIONS */ > > > > > > > Used godbolt.org to check and indeed the function is not unrolled. > > (Gcc 11.2, with flags "-O3 -march=icelake-server"). > > > > Manually unrolling changes the assembly generated in interesting ways. For > > example, it appears to generate more cmov-type instructions for the > > miss/default-value case rather than using branches as in the looped > > version. Whether this is better or not may depend upon usecase - if one > > expects most lpm lookup entries to hit, then having (predictable) branches > > may well be cheaper. > > > > In any case, I'll withdraw any object to unrolling, but I'm still not > > convinced it's necessary. > > > > /Bruce > Interestingly enough until I've defined unlikely() in godbolt, I did > not get any automatic unrolling on godbolt (either with x86 or RISC-V > GCC). Did you get any compilation warnings? That matches what I saw. I then just used manual unrolling i.e. copy-paste the 2 lines 4 times, to see what the output was like then. > That said it only happens on O3 since it implies -fpeel-loops. O3 is > the default for DPDK. ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup 2022-05-27 18:18 ` [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 2022-05-27 18:18 ` [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach @ 2022-05-30 18:24 ` Stanislaw Kardach 2022-05-30 18:24 ` [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach ` (2 more replies) 1 sibling, 3 replies; 19+ messages in thread From: Stanislaw Kardach @ 2022-05-30 18:24 UTC (permalink / raw) To: Vladimir Medvedkin Cc: Stanislaw Kardach, dev, Frank Zhao, Sam Grove, mw, upstream All other rte_lpm_lookup* functions take lpm argument as a const. As the basic rte_lpm_lookup() performs the same function, it should also do that. As this function is inline, no API/ABI change happens. Signed-off-by: Stanislaw Kardach <kda@semihalf.com> --- lib/lpm/rte_lpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h index eb91960e81..1cf863a146 100644 --- a/lib/lpm/rte_lpm.h +++ b/lib/lpm/rte_lpm.h @@ -279,7 +279,7 @@ rte_lpm_delete_all(struct rte_lpm *lpm); * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit */ static inline int -rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) +rte_lpm_lookup(const struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) { unsigned tbl24_index = (ip >> 8); uint32_t tbl_entry; -- 2.30.2 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach @ 2022-05-30 18:24 ` Stanislaw Kardach 2022-06-01 9:41 ` Medvedkin, Vladimir 2022-05-30 20:38 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stephen Hemminger 2022-06-01 9:35 ` Medvedkin, Vladimir 2 siblings, 1 reply; 19+ messages in thread From: Stanislaw Kardach @ 2022-05-30 18:24 UTC (permalink / raw) To: Vladimir Medvedkin Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream, Stanislaw Kardach From: Michal Mazurek <maz@semihalf.com> Add an implementation of the rte_lpm_lookupx4() function for platforms without support for vector operations. This will be useful in the upcoming RISC-V port as well as any platform which may want to start with a basic level of LPM support. Signed-off-by: Michal Mazurek <maz@semihalf.com> Signed-off-by: Stanislaw Kardach <kda@semihalf.com> --- doc/guides/rel_notes/release_22_07.rst | 5 ++++ lib/lpm/meson.build | 1 + lib/lpm/rte_lpm.h | 4 ++- lib/lpm/rte_lpm_scalar.h | 40 ++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 lib/lpm/rte_lpm_scalar.h diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index e49cacecef..0cf3f71269 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -104,6 +104,11 @@ New Features * ``RTE_EVENT_QUEUE_ATTR_WEIGHT`` * ``RTE_EVENT_QUEUE_ATTR_AFFINITY`` +* **Added scalar version of the LPM library.** + + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back + implementation for platforms that don't support vector operations. + Removed Items ------------- diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build index 78d91d3421..6b47361fce 100644 --- a/lib/lpm/meson.build +++ b/lib/lpm/meson.build @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') indirect_headers += files( 'rte_lpm_altivec.h', 'rte_lpm_neon.h', + 'rte_lpm_scalar.h', 'rte_lpm_sse.h', 'rte_lpm_sve.h', ) diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h index 1cf863a146..4f38864fde 100644 --- a/lib/lpm/rte_lpm.h +++ b/lib/lpm/rte_lpm.h @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], #endif #elif defined(RTE_ARCH_PPC_64) #include "rte_lpm_altivec.h" -#else +#elif defined(RTE_ARCH_X86) #include "rte_lpm_sse.h" +#else +#include "rte_lpm_scalar.h" #endif #ifdef __cplusplus diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h new file mode 100644 index 0000000000..4ae1b6f0b8 --- /dev/null +++ b/lib/lpm/rte_lpm_scalar.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 StarFive + * Copyright(c) 2022 SiFive + * Copyright(c) 2022 Semihalf + */ + +#ifndef _RTE_LPM_SCALAR_H_ +#define _RTE_LPM_SCALAR_H_ + +#include <rte_branch_prediction.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32_t nh; + int ret; + + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[0], &nh); + hop[0] = (ret == 0) ? nh : defv; + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[1], &nh); + hop[1] = (ret == 0) ? nh : defv; + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[2], &nh); + hop[2] = (ret == 0) ? nh : defv; + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[3], &nh); + hop[3] = (ret == 0) ? nh : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SCALAR_H_ */ -- 2.30.2 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function 2022-05-30 18:24 ` [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach @ 2022-06-01 9:41 ` Medvedkin, Vladimir 2022-06-01 10:32 ` Stanisław Kardach 0 siblings, 1 reply; 19+ messages in thread From: Medvedkin, Vladimir @ 2022-06-01 9:41 UTC (permalink / raw) To: Stanislaw Kardach Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, mw, upstream Hi Stanislaw, On 30/05/2022 19:24, Stanislaw Kardach wrote: > From: Michal Mazurek <maz@semihalf.com> > > Add an implementation of the rte_lpm_lookupx4() function for platforms > without support for vector operations. > > This will be useful in the upcoming RISC-V port as well as any platform > which may want to start with a basic level of LPM support. > > Signed-off-by: Michal Mazurek <maz@semihalf.com> > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> > --- > doc/guides/rel_notes/release_22_07.rst | 5 ++++ > lib/lpm/meson.build | 1 + > lib/lpm/rte_lpm.h | 4 ++- > lib/lpm/rte_lpm_scalar.h | 40 ++++++++++++++++++++++++++ > 4 files changed, 49 insertions(+), 1 deletion(-) > create mode 100644 lib/lpm/rte_lpm_scalar.h > > diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst > index e49cacecef..0cf3f71269 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -104,6 +104,11 @@ New Features > * ``RTE_EVENT_QUEUE_ATTR_WEIGHT`` > * ``RTE_EVENT_QUEUE_ATTR_AFFINITY`` > > +* **Added scalar version of the LPM library.** > + > + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back > + implementation for platforms that don't support vector operations. > + > > Removed Items > ------------- > diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build > index 78d91d3421..6b47361fce 100644 > --- a/lib/lpm/meson.build > +++ b/lib/lpm/meson.build > @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') > indirect_headers += files( > 'rte_lpm_altivec.h', > 'rte_lpm_neon.h', > + 'rte_lpm_scalar.h', > 'rte_lpm_sse.h', > 'rte_lpm_sve.h', > ) > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > index 1cf863a146..4f38864fde 100644 > --- a/lib/lpm/rte_lpm.h > +++ b/lib/lpm/rte_lpm.h > @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > #endif > #elif defined(RTE_ARCH_PPC_64) > #include "rte_lpm_altivec.h" > -#else > +#elif defined(RTE_ARCH_X86) > #include "rte_lpm_sse.h" > +#else > +#include "rte_lpm_scalar.h" > #endif > > #ifdef __cplusplus > diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h > new file mode 100644 > index 0000000000..4ae1b6f0b8 > --- /dev/null > +++ b/lib/lpm/rte_lpm_scalar.h > @@ -0,0 +1,40 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2022 StarFive > + * Copyright(c) 2022 SiFive > + * Copyright(c) 2022 Semihalf > + */ > + > +#ifndef _RTE_LPM_SCALAR_H_ > +#define _RTE_LPM_SCALAR_H_ > + > +#include <rte_branch_prediction.h> > +#include <rte_byteorder.h> > +#include <rte_common.h> Just a one nit, I think these 3 headers are not needed and can be removed. Apart from it looks good to me. Thanks! > +#include <rte_vect.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +static inline void > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > + uint32_t defv) > +{ > + uint32_t nh; > + int ret; > + > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[0], &nh); > + hop[0] = (ret == 0) ? nh : defv; > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[1], &nh); > + hop[1] = (ret == 0) ? nh : defv; > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[2], &nh); > + hop[2] = (ret == 0) ? nh : defv; > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[3], &nh); > + hop[3] = (ret == 0) ? nh : defv; > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_LPM_SCALAR_H_ */ Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function 2022-06-01 9:41 ` Medvedkin, Vladimir @ 2022-06-01 10:32 ` Stanisław Kardach 0 siblings, 0 replies; 19+ messages in thread From: Stanisław Kardach @ 2022-06-01 10:32 UTC (permalink / raw) To: Medvedkin, Vladimir Cc: Michal Mazurek, dev, Frank Zhao, Sam Grove, Marcin Wojtas, upstream Hi Vladimir, On Wed, Jun 1, 2022 at 11:41 AM Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote: > > Hi Stanislaw, > > > On 30/05/2022 19:24, Stanislaw Kardach wrote: > > From: Michal Mazurek <maz@semihalf.com> > > > > Add an implementation of the rte_lpm_lookupx4() function for platforms > > without support for vector operations. > > > > This will be useful in the upcoming RISC-V port as well as any platform > > which may want to start with a basic level of LPM support. > > > > Signed-off-by: Michal Mazurek <maz@semihalf.com> > > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> > > --- > > doc/guides/rel_notes/release_22_07.rst | 5 ++++ > > lib/lpm/meson.build | 1 + > > lib/lpm/rte_lpm.h | 4 ++- > > lib/lpm/rte_lpm_scalar.h | 40 ++++++++++++++++++++++++++ > > 4 files changed, 49 insertions(+), 1 deletion(-) > > create mode 100644 lib/lpm/rte_lpm_scalar.h > > > > diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst > > index e49cacecef..0cf3f71269 100644 > > --- a/doc/guides/rel_notes/release_22_07.rst > > +++ b/doc/guides/rel_notes/release_22_07.rst > > @@ -104,6 +104,11 @@ New Features > > * ``RTE_EVENT_QUEUE_ATTR_WEIGHT`` > > * ``RTE_EVENT_QUEUE_ATTR_AFFINITY`` > > > > +* **Added scalar version of the LPM library.** > > + > > + * Added scalar implementation of ``rte_lpm_lookupx4``. This is a fall-back > > + implementation for platforms that don't support vector operations. > > + > > > > Removed Items > > ------------- > > diff --git a/lib/lpm/meson.build b/lib/lpm/meson.build > > index 78d91d3421..6b47361fce 100644 > > --- a/lib/lpm/meson.build > > +++ b/lib/lpm/meson.build > > @@ -14,6 +14,7 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') > > indirect_headers += files( > > 'rte_lpm_altivec.h', > > 'rte_lpm_neon.h', > > + 'rte_lpm_scalar.h', > > 'rte_lpm_sse.h', > > 'rte_lpm_sve.h', > > ) > > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > > index 1cf863a146..4f38864fde 100644 > > --- a/lib/lpm/rte_lpm.h > > +++ b/lib/lpm/rte_lpm.h > > @@ -405,8 +405,10 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > #endif > > #elif defined(RTE_ARCH_PPC_64) > > #include "rte_lpm_altivec.h" > > -#else > > +#elif defined(RTE_ARCH_X86) > > #include "rte_lpm_sse.h" > > +#else > > +#include "rte_lpm_scalar.h" > > #endif > > > > #ifdef __cplusplus > > diff --git a/lib/lpm/rte_lpm_scalar.h b/lib/lpm/rte_lpm_scalar.h > > new file mode 100644 > > index 0000000000..4ae1b6f0b8 > > --- /dev/null > > +++ b/lib/lpm/rte_lpm_scalar.h > > @@ -0,0 +1,40 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2022 StarFive > > + * Copyright(c) 2022 SiFive > > + * Copyright(c) 2022 Semihalf > > + */ > > + > > +#ifndef _RTE_LPM_SCALAR_H_ > > +#define _RTE_LPM_SCALAR_H_ > > + > > +#include <rte_branch_prediction.h> > > +#include <rte_byteorder.h> > > +#include <rte_common.h> > > Just a one nit, I think these 3 headers are not needed and can be > removed. Apart from it looks good to me. > Thanks! Thanks for catching this. I'll send a followup right away. > > > +#include <rte_vect.h> > > + > > +#ifdef __cplusplus > > +extern "C" { > > +#endif > > + > > +static inline void > > +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], > > + uint32_t defv) > > +{ > > + uint32_t nh; > > + int ret; > > + > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[0], &nh); > > + hop[0] = (ret == 0) ? nh : defv; > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[1], &nh); > > + hop[1] = (ret == 0) ? nh : defv; > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[2], &nh); > > + hop[2] = (ret == 0) ? nh : defv; > > + ret = rte_lpm_lookup(lpm, ((rte_xmm_t)ip).u32[3], &nh); > > + hop[3] = (ret == 0) ? nh : defv; > > +} > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* _RTE_LPM_SCALAR_H_ */ > > Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> > > -- > Regards, > Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 2022-05-30 18:24 ` [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach @ 2022-05-30 20:38 ` Stephen Hemminger 2022-06-01 9:35 ` Medvedkin, Vladimir 2 siblings, 0 replies; 19+ messages in thread From: Stephen Hemminger @ 2022-05-30 20:38 UTC (permalink / raw) To: Stanislaw Kardach Cc: Vladimir Medvedkin, dev, Frank Zhao, Sam Grove, mw, upstream On Mon, 30 May 2022 20:24:36 +0200 Stanislaw Kardach <kda@semihalf.com> wrote: > All other rte_lpm_lookup* functions take lpm argument as a const. As the > basic rte_lpm_lookup() performs the same function, it should also do > that. > > As this function is inline, no API/ABI change happens. > > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> Acked-by: Stephen Hemminger <stephen@networkplumber.org> ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 2022-05-30 18:24 ` [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-05-30 20:38 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stephen Hemminger @ 2022-06-01 9:35 ` Medvedkin, Vladimir 2 siblings, 0 replies; 19+ messages in thread From: Medvedkin, Vladimir @ 2022-06-01 9:35 UTC (permalink / raw) To: Stanislaw Kardach; +Cc: dev, Frank Zhao, Sam Grove, mw, upstream On 30/05/2022 19:24, Stanislaw Kardach wrote: > All other rte_lpm_lookup* functions take lpm argument as a const. As the > basic rte_lpm_lookup() performs the same function, it should also do > that. > > As this function is inline, no API/ABI change happens. > > Signed-off-by: Stanislaw Kardach <kda@semihalf.com> > --- > lib/lpm/rte_lpm.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/lib/lpm/rte_lpm.h b/lib/lpm/rte_lpm.h > index eb91960e81..1cf863a146 100644 > --- a/lib/lpm/rte_lpm.h > +++ b/lib/lpm/rte_lpm.h > @@ -279,7 +279,7 @@ rte_lpm_delete_all(struct rte_lpm *lpm); > * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit > */ > static inline int > -rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) > +rte_lpm_lookup(const struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) > { > unsigned tbl24_index = (ip >> 8); > uint32_t tbl_entry; Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> -- Regards, Vladimir ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2022-06-01 10:33 UTC | newest] Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-05-10 11:58 [PATCH 1/1] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-05-19 17:02 ` Medvedkin, Vladimir 2022-05-24 16:28 ` Stanisław Kardach 2022-05-27 11:16 ` Stanisław Kardach 2022-05-27 13:16 ` Medvedkin, Vladimir 2022-05-27 18:18 ` [PATCH v2 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 2022-05-27 18:18 ` [PATCH v2 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-05-27 20:15 ` Stephen Hemminger 2022-05-30 7:52 ` Bruce Richardson 2022-05-30 8:00 ` Morten Brørup 2022-05-30 10:42 ` Bruce Richardson 2022-05-30 11:20 ` Stanisław Kardach 2022-05-30 12:46 ` Bruce Richardson 2022-05-30 18:24 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stanislaw Kardach 2022-05-30 18:24 ` [PATCH v3 2/2] lpm: add a scalar version of lookupx4 function Stanislaw Kardach 2022-06-01 9:41 ` Medvedkin, Vladimir 2022-06-01 10:32 ` Stanisław Kardach 2022-05-30 20:38 ` [PATCH v3 1/2] lpm: add const to lpm arg of rte_lpm_lookup Stephen Hemminger 2022-06-01 9:35 ` Medvedkin, Vladimir
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).