* [dpdk-dev] [PATCH 1/2] eal: introduce integer divide through reciprocal @ 2017-08-29 18:46 Pavan Nikhilesh 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh ` (2 more replies) 0 siblings, 3 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2017-08-29 18:46 UTC (permalink / raw) To: dev; +Cc: cristian.dumitrescu, stephen, Pavan Nikhilesh In some use cases of integer division, denominator remains constant and numerator varies. It is possible to optimize division for such specific scenarios. The librte_sched uses rte_reciprocal to optimize division so, moving it to eal/common would allow other libraries and applications to use it. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 7 +++++++ lib/librte_eal/common/Makefile | 1 + lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h | 6 ++++-- lib/{librte_sched => librte_eal/common}/rte_reciprocal.c | 6 ++++-- lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 7 +++++++ lib/librte_sched/Makefile | 2 -- lib/librte_sched/rte_sched.c | 2 +- 9 files changed, 26 insertions(+), 7 deletions(-) rename lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h (87%) rename lib/{librte_sched => librte_eal/common}/rte_reciprocal.c (96%) diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index 005019e..56f9804 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -88,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 79e7d31..00d5f60 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -238,3 +238,10 @@ EXPERIMENTAL { rte_service_unregister; } DPDK_17.08; + +DPDK_17.11 { + global: + + rte_reciprocal_value; + +} DPDK_17.11; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index e8fd67a..a680b2d 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -42,6 +42,7 @@ INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h +INC += rte_reciprocal.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h similarity index 87% rename from lib/librte_sched/rte_reciprocal.h rename to lib/librte_eal/common/include/rte_reciprocal.h index 5e21f09..b6d752f 100644 --- a/lib/librte_sched/rte_reciprocal.h +++ b/lib/librte_eal/common/include/rte_reciprocal.h @@ -29,13 +29,15 @@ struct rte_reciprocal { uint8_t sh1, sh2; }; -static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) +static inline uint32_t +rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) { uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); return (t + ((a - t) >> R.sh1)) >> R.sh2; } -struct rte_reciprocal rte_reciprocal_value(uint32_t d); +struct rte_reciprocal +rte_reciprocal_value(uint32_t d); #endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c similarity index 96% rename from lib/librte_sched/rte_reciprocal.c rename to lib/librte_eal/common/rte_reciprocal.c index 652f023..7ab99b4 100644 --- a/lib/librte_sched/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -41,7 +41,8 @@ /* find largest set bit. * portable and slow but does not matter for this usage. */ -static inline int fls(uint32_t x) +static inline int +fls(uint32_t x) { int b; @@ -53,7 +54,8 @@ static inline int fls(uint32_t x) return 0; } -struct rte_reciprocal rte_reciprocal_value(uint32_t d) +struct rte_reciprocal +rte_reciprocal_value(uint32_t d) { struct rte_reciprocal R; uint64_t m; diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 90bca4d..98f3b8e 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -100,6 +100,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 468c706..ac68fb3 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -243,3 +243,10 @@ EXPERIMENTAL { rte_service_unregister; } DPDK_17.08; + +DPDK_17.11 { + global: + + rte_reciprocal_value; + +} DPDK_17.11; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 18274e7..569656b 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -52,10 +52,8 @@ LIBABIVER := 1 # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c -SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index b7cba11..3b8ccaa 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -42,12 +42,12 @@ #include <rte_prefetch.h> #include <rte_branch_prediction.h> #include <rte_mbuf.h> +#include <rte_reciprocal.h> #include "rte_sched.h" #include "rte_bitmap.h" #include "rte_sched_common.h" #include "rte_approx.h" -#include "rte_reciprocal.h" #ifdef __INTEL_COMPILER #pragma warning(disable:2259) /* conversion may lose significant bits */ -- 2.7.4 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal 2017-08-29 18:46 [dpdk-dev] [PATCH 1/2] eal: introduce integer divide through reciprocal Pavan Nikhilesh @ 2017-08-29 18:46 ` Pavan Nikhilesh 2017-08-29 19:35 ` Stephen Hemminger 2017-08-29 19:38 ` Stephen Hemminger 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh 2 siblings, 2 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2017-08-29 18:46 UTC (permalink / raw) To: dev; +Cc: cristian.dumitrescu, stephen, Pavan Nikhilesh Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit adds support for unsigned 64bit divisors. Rename unsigned 32bit specific functions appropriately and update librte_sched accordingly. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- lib/librte_eal/bsdapp/eal/rte_eal_version.map | 3 +- lib/librte_eal/common/include/rte_reciprocal.h | 111 ++++++++++++++++++++-- lib/librte_eal/common/rte_reciprocal.c | 120 +++++++++++++++++++++--- lib/librte_eal/linuxapp/eal/rte_eal_version.map | 3 +- lib/librte_sched/Makefile | 4 +- lib/librte_sched/rte_sched.c | 9 +- 6 files changed, 222 insertions(+), 28 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 00d5f60..f7f1013 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -242,6 +242,7 @@ EXPERIMENTAL { DPDK_17.11 { global: - rte_reciprocal_value; + rte_reciprocal_value_u32; + rte_reciprocal_value_u64; } DPDK_17.11; diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h index b6d752f..fccbc09 100644 --- a/lib/librte_eal/common/include/rte_reciprocal.h +++ b/lib/librte_eal/common/include/rte_reciprocal.h @@ -19,25 +19,120 @@ * to calculate the division A/B. */ +/* + * libdivide + * Copyright (C) 2010 ridiculous_fish + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + * + * libdivide@ridiculousfish.com + * + */ + #ifndef _RTE_RECIPROCAL_H_ #define _RTE_RECIPROCAL_H_ -#include <stdint.h> +#include <rte_memory.h> -struct rte_reciprocal { +/** + * Unsigned 32-bit divisor structure. + */ +struct rte_reciprocal_u32 { uint32_t m; uint8_t sh1, sh2; -}; +} __rte_cache_aligned; + +/** + * Unsigned 64-bit divisor structure. + */ +struct rte_reciprocal_u64 { + uint64_t m; + uint8_t sh1; +} __rte_cache_aligned; +/** + * Divide given unsigned 32-bit integer with pre calculated divisor. + * + * @param a + * The 32-bit dividend. + * @param R + * The pointer to pre calculated divisor reciprocal structure. + * + * @return + * The result of the division + */ static inline uint32_t -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) +{ + uint32_t t = (uint32_t)(((uint64_t)a * R->m) >> 32); + + return (t + ((a - t) >> R->sh1)) >> R->sh2; +} + +static inline uint64_t +mullhi_u64(uint64_t x, uint64_t y) { - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); + __uint128_t xl = x; + __uint128_t yl = y; + __uint128_t rl = xl * yl; + return (uint64_t)(rl >> 64); +} - return (t + ((a - t) >> R.sh1)) >> R.sh2; +/** + * Divide given unsigned 64-bit integer with pre calculated divisor. + * + * @param a + * The 64-bit dividend. + * @param R + * The pointer to pre calculated divisor reciprocal structure. + * + * @return + * The result of the division + */ +static inline uint64_t +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R) +{ + uint64_t q = mullhi_u64(R->m, a); + uint64_t t = ((a - q) >> 1) + q; + return t >> R->sh1; } -struct rte_reciprocal -rte_reciprocal_value(uint32_t d); +/** + * Generate pre calculated divisor structure. + * + * @param d + * The unsigned 32-bit divisor. + * + * @return + * Divisor structure. + */ +struct rte_reciprocal_u32 +rte_reciprocal_value_u32(uint32_t d); + +/** + * Generate pre calculated divisor structure. + * + * @param d + * The unsigned 64-bit divisor. + * + * @return + * Divisor structure. + */ +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d); #endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c index 7ab99b4..5d7e367 100644 --- a/lib/librte_eal/common/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -31,18 +31,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <stdio.h> -#include <stdint.h> - -#include <rte_common.h> - -#include "rte_reciprocal.h" +#include <rte_reciprocal.h> /* find largest set bit. * portable and slow but does not matter for this usage. */ static inline int -fls(uint32_t x) +fls_u32(uint32_t x) { int b; @@ -54,21 +49,120 @@ fls(uint32_t x) return 0; } -struct rte_reciprocal -rte_reciprocal_value(uint32_t d) +struct rte_reciprocal_u32 +rte_reciprocal_value_u32(uint32_t d) { - struct rte_reciprocal R; + struct rte_reciprocal_u32 R; uint64_t m; int l; - l = fls(d - 1); + l = fls_u32(d - 1); m = ((1ULL << 32) * ((1ULL << l) - d)); m /= d; ++m; R.m = m; - R.sh1 = RTE_MIN(l, 1); - R.sh2 = RTE_MAX(l - 1, 0); + R.sh1 = l > 1 ? 1 : l; + R.sh2 = (l - 1 > 0) ? l - 1 : 0; + + return R; +} + +/* Code taken from Hacker's Delight: + * http://www.hackersdelight.org/HDcode/divlu.c. + * License permits inclusion here per: + * http://www.hackersdelight.org/permissions.htm + */ +static inline uint64_t +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) +{ + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ + uint64_t un1, un0, /* Norm. dividend LSD's. */ + vn1, vn0, /* Norm. divisor digits. */ + q1, q0, /* Quotient digits. */ + un64, un21, un10, /* Dividend digit pairs. */ + rhat; /* A remainder. */ + int s; /* Shift amount for norm. */ + + /* If overflow, set rem. to an impossible value. */ + if (u1 >= v) { + if (r != NULL) + *r = (uint64_t) -1; + return (uint64_t) -1; + } + + /* Count leading zeros. */ + s = __builtin_clzll(v); + if (s > 0) { + v = v << s; + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); + un10 = u0 << s; + } else { + + un64 = u1 | u0; + un10 = u0; + } + + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; + + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + q1 = un64/vn1; + rhat = un64 - q1*vn1; +again1: + if (q1 >= b || q1*vn0 > b*rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again1; + } + + un21 = un64*b + un1 - q1*v; + + q0 = un21/vn1; + rhat = un21 - q0*vn1; +again2: + if (q0 >= b || q0*vn0 > b*rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again2; + } + + if (r != NULL) + *r = (un21*b + un0 - q0*v) >> s; + return q1*b + q0; +} + +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d) +{ + struct rte_reciprocal_u64 R; + + const uint32_t fld = 63 - __builtin_clzll(d); + + if ((d & (d - 1)) == 0) { + R.m = 0; + R.sh1 = (fld - 1) | 0x40; + } else { + uint64_t rem; + uint64_t multiplier; + uint8_t more; + + multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d, &rem); + multiplier += multiplier; + + const uint64_t twice_rem = rem + rem; + if (twice_rem >= d || twice_rem < rem) + multiplier += 1; + more = fld; + R.m = 1 + multiplier; + R.sh1 = more | 0x40; + } + + R.sh1 &= 0x3F; return R; } diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index ac68fb3..e64840c 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -247,6 +247,7 @@ EXPERIMENTAL { DPDK_17.11 { global: - rte_reciprocal_value; + rte_reciprocal_value_u32; + rte_reciprocal_value_u64; } DPDK_17.11; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 569656b..a2fd6f3 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -54,6 +54,8 @@ LIBABIVER := 1 SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h rte_red.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 3b8ccaa..7bb6d51 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -228,7 +228,7 @@ struct rte_sched_port { uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ uint64_t time; /* Current NIC TX time measured in bytes */ - struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ + struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per byte */ /* Scheduling loop detection */ uint32_t pipe_loop; @@ -677,7 +677,7 @@ rte_sched_port_config(struct rte_sched_port_params *params) cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) / params->rate; - port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); + port->inv_cycles_per_byte = rte_reciprocal_value_u32(cycles_per_byte); /* Scheduling loop detection */ port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct rte_sched_port *port) uint64_t bytes_diff; /* Compute elapsed time in bytes */ - bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT, - port->inv_cycles_per_byte); + bytes_diff = rte_reciprocal_divide_u32( + cycles_diff << RTE_SCHED_TIME_SHIFT, + &port->inv_cycles_per_byte); /* Advance port time */ port->time_cpu_cycles = cycles; -- 2.7.4 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh @ 2017-08-29 19:35 ` Stephen Hemminger 2017-08-29 19:38 ` Stephen Hemminger 1 sibling, 0 replies; 19+ messages in thread From: Stephen Hemminger @ 2017-08-29 19:35 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: dev, cristian.dumitrescu On Wed, 30 Aug 2017 00:16:18 +0530 Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> wrote: > +/* > + * libdivide > + * Copyright (C) 2010 ridiculous_fish > + * This software is provided 'as-is', without any express or implied > + * warranty. In no event will the authors be held liable for any damages > + * arising from the use of this software. > + * Permission is granted to anyone to use this software for any purpose, > + * including commercial applications, and to alter it and redistribute it > + * freely, subject to the following restrictions: > + * > + * 1. The origin of this software must not be misrepresented; you must not > + * claim that you wrote the original software. If you use this software > + * in a product, an acknowledgment in the product documentation would be > + * appreciated but is not required. > + * > + * 2. Altered source versions must be plainly marked as such, and must not be > + * misrepresented as being the original software. > + * > + * 3. This notice may not be removed or altered from any source distribution. > + * > + * libdivide@ridiculousfish.com > + * > + */ > + No new license, certainly no advertising clauses. ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2017-08-29 19:35 ` Stephen Hemminger @ 2017-08-29 19:38 ` Stephen Hemminger 2017-08-30 4:05 ` Pavan Nikhilesh Bhagavatula 1 sibling, 1 reply; 19+ messages in thread From: Stephen Hemminger @ 2017-08-29 19:38 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: dev, cristian.dumitrescu On Wed, 30 Aug 2017 00:16:18 +0530 Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> wrote: > +static inline uint64_t > +mullhi_u64(uint64_t x, uint64_t y) > { > - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); > + __uint128_t xl = x; > + __uint128_t yl = y; > + __uint128_t rl = xl * yl; > + return (uint64_t)(rl >> 64); > +} Cast in return not necessary. And cast when setting t not necessary. Please blank line after declarations. Also you don't need to cast both sides of multiply. Some compilers maybe able to optimize 128 bit * 64 bit. ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal 2017-08-29 19:38 ` Stephen Hemminger @ 2017-08-30 4:05 ` Pavan Nikhilesh Bhagavatula 0 siblings, 0 replies; 19+ messages in thread From: Pavan Nikhilesh Bhagavatula @ 2017-08-30 4:05 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev On Tue, Aug 29, 2017 at 12:38:00PM -0700, Stephen Hemminger wrote: > On Wed, 30 Aug 2017 00:16:18 +0530 > Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> wrote: > Hi Stephen, > > +static inline uint64_t > > +mullhi_u64(uint64_t x, uint64_t y) > > { > > - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); > > + __uint128_t xl = x; > > + __uint128_t yl = y; > > + __uint128_t rl = xl * yl; > > + return (uint64_t)(rl >> 64); > > +} > > Cast in return not necessary. And cast when setting t not necessary. > Please blank line after declarations. > > Also you don't need to cast both sides of multiply. Some compilers maybe > able to optimize 128 bit * 64 bit. Thanks for the inputs, will do the required changes along with removing license in v2. -Pavan ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal 2017-08-29 18:46 [dpdk-dev] [PATCH 1/2] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh @ 2017-12-04 13:22 ` Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh ` (2 more replies) 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh 2 siblings, 3 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2017-12-04 13:22 UTC (permalink / raw) To: cristian.dumitrescu, stephen, ktraynor; +Cc: dev, Pavan Nikhilesh In some use cases of integer division, denominator remains constant and numerator varies. It is possible to optimize division for such specific scenarios. The librte_sched uses rte_reciprocal to optimize division so, moving it to eal/common would allow other libraries and applications to use it. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- v7 changes: - reworked u64 division (follows same flow as 32bit). - addressed review comments from v6 (Undo cosmetic changes to 32bit division) v6 changes: - remove cache alignment from rte_reciprocal_u{32/64}structures as they would be embedded in other structures. v5 changes: - fix test print strings v4 changes: - minor fix for test cases - fix u32 divisor generation v3 changes: - fix x86_32 compilation issue - fix improper licence in test v2 changes: - fix compilation issues with .map files - add test cases for correctness and performance - remove extra licence inclusion - fix coding style issues lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/common/Makefile | 1 + lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h | 0 lib/{librte_sched => librte_eal/common}/rte_reciprocal.c | 0 lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/rte_eal_version.map | 7 +++++++ lib/librte_sched/Makefile | 2 -- lib/librte_sched/rte_sched.c | 2 +- 8 files changed, 11 insertions(+), 3 deletions(-) rename lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h (100%) rename lib/{librte_sched => librte_eal/common}/rte_reciprocal.c (100%) diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index afa117de4..b0442a75d 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -84,6 +84,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index 9effd0d45..ed0d3f80b 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -44,6 +44,7 @@ INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h INC += rte_bitmap.h rte_vfio.h +INC += rte_reciprocal.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h similarity index 100% rename from lib/librte_sched/rte_reciprocal.h rename to lib/librte_eal/common/include/rte_reciprocal.h diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c similarity index 100% rename from lib/librte_sched/rte_reciprocal.c rename to lib/librte_eal/common/rte_reciprocal.c diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 5a7b8b2ac..ce65d5ccc 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -91,6 +91,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index f4f46c1be..d8b968c5c 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -200,6 +200,13 @@ DPDK_17.11 { } DPDK_17.08; +DPDK_18.02 { + global: + + rte_reciprocal_value; + +} DPDK_17.11; + EXPERIMENTAL { global: diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 04785f720..535b23d76 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -54,10 +54,8 @@ LIBABIVER := 1 # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c -SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_sched_common.h rte_red.h rte_approx.h -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 7252f850d..35c5c3cde 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -43,11 +43,11 @@ #include <rte_branch_prediction.h> #include <rte_mbuf.h> #include <rte_bitmap.h> +#include <rte_reciprocal.h> #include "rte_sched.h" #include "rte_sched_common.h" #include "rte_approx.h" -#include "rte_reciprocal.h" #ifdef __INTEL_COMPILER #pragma warning(disable:2259) /* conversion may lose significant bits */ -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v7 2/3] eal: add u64 bit variant for reciprocal 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh @ 2017-12-04 13:22 ` Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division Pavan Nikhilesh 2018-01-11 16:01 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Dumitrescu, Cristian 2 siblings, 0 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2017-12-04 13:22 UTC (permalink / raw) To: cristian.dumitrescu, stephen, ktraynor; +Cc: dev, Pavan Nikhilesh Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit adds support for unsigned 64bit divisors. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- lib/librte_eal/common/include/rte_reciprocal.h | 46 +++++++++++++ lib/librte_eal/common/rte_reciprocal.c | 89 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 1 + 3 files changed, 136 insertions(+) diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h index 5e21f096f..c583da189 100644 --- a/lib/librte_eal/common/include/rte_reciprocal.h +++ b/lib/librte_eal/common/include/rte_reciprocal.h @@ -29,6 +29,11 @@ struct rte_reciprocal { uint8_t sh1, sh2; }; +struct rte_reciprocal_u64 { + uint64_t m; + uint8_t sh1, sh2; +}; + static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) { uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); @@ -36,6 +41,47 @@ static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R return (t + ((a - t) >> R.sh1)) >> R.sh2; } +static __rte_always_inline uint64_t +mullhi_u64(uint64_t x, uint64_t y) +{ +#ifdef __SIZEOF_INT128__ + __uint128_t xl = x; + __uint128_t rl = xl * y; + + return (rl >> 64); +#else + uint64_t u0, u1, v0, v1, k, t; + uint64_t w1, w2; + uint64_t whi; + + u1 = x >> 32; u0 = x & 0xFFFFFFFF; + v1 = y >> 32; v0 = y & 0xFFFFFFFF; + + t = u0*v0; + k = t >> 32; + + t = u1*v0 + k; + w1 = t & 0xFFFFFFFF; + w2 = t >> 32; + + t = u0*v1 + w1; + k = t >> 32; + + whi = u1*v1 + w2 + k; + + return whi; +#endif +} + +static __rte_always_inline uint64_t +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R) +{ + uint64_t t = mullhi_u64(a, R->m); + + return (t + ((a - t) >> R->sh1)) >> R->sh2; +} + struct rte_reciprocal rte_reciprocal_value(uint32_t d); +struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d); #endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c index 652f0237a..8d1fb7bff 100644 --- a/lib/librte_eal/common/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -70,3 +70,92 @@ struct rte_reciprocal rte_reciprocal_value(uint32_t d) return R; } + +/* + * Code taken from Hacker's Delight: + * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt + * License permits inclusion here per: + * http://www.hackersdelight.org/permissions.htm + */ +static uint64_t +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) +{ + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ + uint64_t un1, un0, /* Norm. dividend LSD's. */ + vn1, vn0, /* Norm. divisor digits. */ + q1, q0, /* Quotient digits. */ + un64, un21, un10, /* Dividend digit pairs. */ + rhat; /* A remainder. */ + int s; /* Shift amount for norm. */ + + /* If overflow, set rem. to an impossible value. */ + if (u1 >= v) { + if (r != NULL) + *r = (uint64_t) -1; + return (uint64_t) -1; + } + + /* Count leading zeros. */ + s = __builtin_clzll(v); + if (s > 0) { + v = v << s; + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); + un10 = u0 << s; + } else { + + un64 = u1 | u0; + un10 = u0; + } + + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; + + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + q1 = un64/vn1; + rhat = un64 - q1*vn1; +again1: + if (q1 >= b || q1*vn0 > b*rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again1; + } + + un21 = un64*b + un1 - q1*v; + + q0 = un21/vn1; + rhat = un21 - q0*vn1; +again2: + if (q0 >= b || q0*vn0 > b*rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again2; + } + + if (r != NULL) + *r = (un21*b + un0 - q0*v) >> s; + return q1*b + q0; +} + +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d) +{ + struct rte_reciprocal_u64 R; + uint64_t m; + int l; + + l = 63 - __builtin_clzll(d); + + m = divide_128_div_64_to_64((1ULL << l), 0, d, NULL) << 1; + m = (1ULL << l) - d ? m + 2 : 1; + R.m = m; + + R.sh1 = l > 1 ? 1 : l; + R.sh2 = (l > 0) ? l : 0; + R.sh2 -= R.sh2 && (m == 1) ? 1 : 0; + + return R; +} diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index d8b968c5c..ad3ce42ff 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -204,6 +204,7 @@ DPDK_18.02 { global: rte_reciprocal_value; + rte_reciprocal_value_u64; } DPDK_17.11; -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh @ 2017-12-04 13:22 ` Pavan Nikhilesh 2018-01-18 23:49 ` Thomas Monjalon 2018-01-11 16:01 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Dumitrescu, Cristian 2 siblings, 1 reply; 19+ messages in thread From: Pavan Nikhilesh @ 2017-12-04 13:22 UTC (permalink / raw) To: cristian.dumitrescu, stephen, ktraynor; +Cc: dev, Pavan Nikhilesh This commit provides a set of tests for verifying the correctness and performance of both unsigned 32 and 64bit reciprocal based division. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- test/test/Makefile | 2 + test/test/test_reciprocal_division.c | 190 +++++++++++++++++++++++++ test/test/test_reciprocal_division_perf.c | 229 ++++++++++++++++++++++++++++++ 3 files changed, 421 insertions(+) create mode 100644 test/test/test_reciprocal_division.c create mode 100644 test/test/test_reciprocal_division_perf.c diff --git a/test/test/Makefile b/test/test/Makefile index bb54c9808..7f4aa67b9 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -95,6 +95,8 @@ SRCS-y += test_spinlock.c SRCS-y += test_memory.c SRCS-y += test_memzone.c SRCS-y += test_bitmap.c +SRCS-y += test_reciprocal_division.c +SRCS-y += test_reciprocal_division_perf.c SRCS-y += test_ring.c SRCS-y += test_ring_perf.c diff --git a/test/test/test_reciprocal_division.c b/test/test/test_reciprocal_division.c new file mode 100644 index 000000000..75313c486 --- /dev/null +++ b/test/test/test_reciprocal_division.c @@ -0,0 +1,190 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_random.h> +#include <rte_reciprocal.h> + +#define MAX_ITERATIONS (1ULL << 32) +#define DIVIDE_ITER (100) + +static int +test_reciprocal(void) +{ + int result = 0; + uint32_t divisor_u32 = 0; + uint32_t dividend_u32; + uint32_t nresult_u32; + uint32_t rresult_u32; + uint64_t i; + uint64_t divisor_u64 = 0; + uint64_t dividend_u64; + uint64_t nresult_u64; + uint64_t rresult_u64; + struct rte_reciprocal reci_u32 = {0}; + struct rte_reciprocal_u64 reci_u64 = {0}; + + rte_srand(rte_rdtsc()); + printf("Validating unsigned 32bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u32 = rte_rand(); + reci_u32 = rte_reciprocal_value(divisor_u32); + } + + dividend_u32 = rte_rand(); + nresult_u32 = dividend_u32 / divisor_u32; + rresult_u32 = rte_reciprocal_divide(dividend_u32, + reci_u32); + if (nresult_u32 != rresult_u32) { + printf("Division failed, %"PRIu32"/%"PRIu32" = " + "expected %"PRIu32" result %"PRIu32"\n", + dividend_u32, divisor_u32, + nresult_u32, rresult_u32); + result = 1; + break; + } + } + + printf("Validating unsigned 64bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand(); + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("Validating unsigned 64bit division with 32bit divisor.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand() >> 32; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("Validating division by power of 2.\n"); + for (i = 0; i < 32; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + reci_u32 = rte_reciprocal_value((uint32_t)divisor_u64); + + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf("Division 64 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + } + + nresult_u32 = (dividend_u64 >> 32) / divisor_u64; + rresult_u32 = rte_reciprocal_divide((dividend_u64 >> 32), + reci_u32); + + if (nresult_u32 != rresult_u32) { + printf("Division 32 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64 >> 32, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + for (; i < 64; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + + } + + return result; +} + +REGISTER_TEST_COMMAND(reciprocal_division, test_reciprocal); diff --git a/test/test/test_reciprocal_division_perf.c b/test/test/test_reciprocal_division_perf.c new file mode 100644 index 000000000..e759331de --- /dev/null +++ b/test/test/test_reciprocal_division_perf.c @@ -0,0 +1,229 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_random.h> +#include <rte_reciprocal.h> + +#define MAX_ITERATIONS (1ULL << 32) +#define DIVIDE_ITER (1ULL << 28) + +static int +test_reciprocal_division_perf(void) +{ + int result = 0; + uint32_t divisor_u32 = 0; + uint32_t dividend_u32; + uint64_t divisor_u64 = 0; + uint64_t dividend_u64; + volatile uint32_t nresult_u32; + volatile uint32_t rresult_u32; + volatile uint64_t nresult_u64; + volatile uint64_t rresult_u64; + uint64_t start_cyc; + uint64_t split_cyc; + uint64_t end_cyc; + uint64_t tot_cyc_n = 0; + uint64_t tot_cyc_r = 0; + uint64_t i; + struct rte_reciprocal reci_u32 = {0}; + struct rte_reciprocal_u64 reci_u64 = {0}; + + rte_srand(rte_rdtsc()); + + printf("Validating unsigned 32bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u32 = rte_rand(); + reci_u32 = rte_reciprocal_value(divisor_u32); + } + + dividend_u32 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u32 = dividend_u32 / divisor_u32; + split_cyc = rte_rdtsc(); + rresult_u32 = rte_reciprocal_divide(dividend_u32, + reci_u32); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u32 != rresult_u32) { + printf("Division failed, expected %"PRIu32" " + "result %"PRIu32"", + nresult_u32, rresult_u32); + result = 1; + break; + } + } + printf("32bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating unsigned 64bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand(); + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division failed, expected %"PRIu64" " + "result %"PRIu64"", + nresult_u64, rresult_u64); + result = 1; + break; + } + } + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating unsigned 64bit division with 32bit divisor.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand() >> 32; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division failed, expected %"PRIu64" " + "result %"PRIu64"", + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating division by power of 2.\n"); + for (i = 0; i < 64; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division 64 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n", + ((double)tot_cyc_r)/i); + + return result; +} + +REGISTER_TEST_COMMAND(reciprocal_division_perf, test_reciprocal_division_perf); -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division Pavan Nikhilesh @ 2018-01-18 23:49 ` Thomas Monjalon 2018-01-25 22:35 ` Thomas Monjalon 0 siblings, 1 reply; 19+ messages in thread From: Thomas Monjalon @ 2018-01-18 23:49 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: dev, cristian.dumitrescu, stephen, ktraynor 04/12/2017 14:22, Pavan Nikhilesh: > +/* > + * BSD LICENSE > + * > + * Copyright (C) Cavium, Inc. 2017. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Cavium, Inc nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ Please re-send with SPDX tag. Thanks ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division 2018-01-18 23:49 ` Thomas Monjalon @ 2018-01-25 22:35 ` Thomas Monjalon 0 siblings, 0 replies; 19+ messages in thread From: Thomas Monjalon @ 2018-01-25 22:35 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: dev, cristian.dumitrescu, stephen, ktraynor Ping 19/01/2018 00:49, Thomas Monjalon: > 04/12/2017 14:22, Pavan Nikhilesh: > > +/* > > + * BSD LICENSE > > + * > > + * Copyright (C) Cavium, Inc. 2017. > > + * > > + * Redistribution and use in source and binary forms, with or without > > + * modification, are permitted provided that the following conditions > > + * are met: > > + * > > + * * Redistributions of source code must retain the above copyright > > + * notice, this list of conditions and the following disclaimer. > > + * * Redistributions in binary form must reproduce the above copyright > > + * notice, this list of conditions and the following disclaimer in > > + * the documentation and/or other materials provided with the > > + * distribution. > > + * * Neither the name of Cavium, Inc nor the names of its > > + * contributors may be used to endorse or promote products derived > > + * from this software without specific prior written permission. > > + * > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > > + */ > > Please re-send with SPDX tag. > Thanks ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division Pavan Nikhilesh @ 2018-01-11 16:01 ` Dumitrescu, Cristian 2 siblings, 0 replies; 19+ messages in thread From: Dumitrescu, Cristian @ 2018-01-11 16:01 UTC (permalink / raw) To: Pavan Nikhilesh, stephen, ktraynor; +Cc: dev > -----Original Message----- > From: Pavan Nikhilesh [mailto:pbhagavatula@caviumnetworks.com] > Sent: Monday, December 4, 2017 1:22 PM > To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; > stephen@networkplumber.org; ktraynor@redhat.com > Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > Subject: [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through > reciprocal > > In some use cases of integer division, denominator remains constant and > numerator varies. It is possible to optimize division for such specific > scenarios. > > The librte_sched uses rte_reciprocal to optimize division so, moving it to > eal/common would allow other libraries and applications to use it. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> > --- > > v7 changes: > - reworked u64 division (follows same flow as 32bit). > - addressed review comments from v6 (Undo cosmetic changes to 32bit > division) > > v6 changes: > - remove cache alignment from rte_reciprocal_u{32/64}structures as they > would > be embedded in other structures. > > v5 changes: > - fix test print strings > > v4 changes: > - minor fix for test cases > - fix u32 divisor generation > > v3 changes: > - fix x86_32 compilation issue > - fix improper licence in test > > v2 changes: > - fix compilation issues with .map files > - add test cases for correctness and performance > - remove extra licence inclusion > - fix coding style issues > > lib/librte_eal/bsdapp/eal/Makefile | 1 + > lib/librte_eal/common/Makefile | 1 + > lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h | 0 > lib/{librte_sched => librte_eal/common}/rte_reciprocal.c | 0 > lib/librte_eal/linuxapp/eal/Makefile | 1 + > lib/librte_eal/rte_eal_version.map | 7 +++++++ > lib/librte_sched/Makefile | 2 -- > lib/librte_sched/rte_sched.c | 2 +- > 8 files changed, 11 insertions(+), 3 deletions(-) > rename lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h > (100%) > rename lib/{librte_sched => librte_eal/common}/rte_reciprocal.c (100%) > > diff --git a/lib/librte_eal/bsdapp/eal/Makefile > b/lib/librte_eal/bsdapp/eal/Makefile > index afa117de4..b0442a75d 100644 > --- a/lib/librte_eal/bsdapp/eal/Makefile > +++ b/lib/librte_eal/bsdapp/eal/Makefile > @@ -84,6 +84,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += > malloc_elem.c > SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c > SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c > SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c > +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c > > # from arch dir > SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c > diff --git a/lib/librte_eal/common/Makefile > b/lib/librte_eal/common/Makefile > index 9effd0d45..ed0d3f80b 100644 > --- a/lib/librte_eal/common/Makefile > +++ b/lib/librte_eal/common/Makefile > @@ -44,6 +44,7 @@ INC += rte_pci_dev_feature_defs.h > rte_pci_dev_features.h > INC += rte_malloc.h rte_keepalive.h rte_time.h > INC += rte_service.h rte_service_component.h > INC += rte_bitmap.h rte_vfio.h > +INC += rte_reciprocal.h > > GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h > GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h > diff --git a/lib/librte_sched/rte_reciprocal.h > b/lib/librte_eal/common/include/rte_reciprocal.h > similarity index 100% > rename from lib/librte_sched/rte_reciprocal.h > rename to lib/librte_eal/common/include/rte_reciprocal.h > diff --git a/lib/librte_sched/rte_reciprocal.c > b/lib/librte_eal/common/rte_reciprocal.c > similarity index 100% > rename from lib/librte_sched/rte_reciprocal.c > rename to lib/librte_eal/common/rte_reciprocal.c > diff --git a/lib/librte_eal/linuxapp/eal/Makefile > b/lib/librte_eal/linuxapp/eal/Makefile > index 5a7b8b2ac..ce65d5ccc 100644 > --- a/lib/librte_eal/linuxapp/eal/Makefile > +++ b/lib/librte_eal/linuxapp/eal/Makefile > @@ -91,6 +91,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += > malloc_elem.c > SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c > SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c > SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c > +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c > > # from arch dir > SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c > diff --git a/lib/librte_eal/rte_eal_version.map > b/lib/librte_eal/rte_eal_version.map > index f4f46c1be..d8b968c5c 100644 > --- a/lib/librte_eal/rte_eal_version.map > +++ b/lib/librte_eal/rte_eal_version.map > @@ -200,6 +200,13 @@ DPDK_17.11 { > > } DPDK_17.08; > > +DPDK_18.02 { > + global: > + > + rte_reciprocal_value; > + > +} DPDK_17.11; > + > EXPERIMENTAL { > global: > > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile > index 04785f720..535b23d76 100644 > --- a/lib/librte_sched/Makefile > +++ b/lib/librte_sched/Makefile > @@ -54,10 +54,8 @@ LIBABIVER := 1 > # all source are stored in SRCS-y > # > SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c > -SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c > > # install includes > SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h > rte_sched_common.h rte_red.h rte_approx.h > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h > > include $(RTE_SDK)/mk/rte.lib.mk > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c > index 7252f850d..35c5c3cde 100644 > --- a/lib/librte_sched/rte_sched.c > +++ b/lib/librte_sched/rte_sched.c > @@ -43,11 +43,11 @@ > #include <rte_branch_prediction.h> > #include <rte_mbuf.h> > #include <rte_bitmap.h> > +#include <rte_reciprocal.h> > > #include "rte_sched.h" > #include "rte_sched_common.h" > #include "rte_approx.h" > -#include "rte_reciprocal.h" > > #ifdef __INTEL_COMPILER > #pragma warning(disable:2259) /* conversion may lose significant bits */ > -- > 2.14.1 Hi Pavan, To make progress on this patch set, I suggest you split and resend this exact patch as a standalone patch set, so we at least move the existing 32-bit version from librte_sched to EAL for other libs/apps could use it. Regards, Cristian ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v8 1/3] eal: introduce integer divide through reciprocal 2017-08-29 18:46 [dpdk-dev] [PATCH 1/2] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh @ 2018-01-26 5:04 ` Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh ` (2 more replies) 2 siblings, 3 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2018-01-26 5:04 UTC (permalink / raw) To: thomas, cristian.dumitrescu, stephen; +Cc: dev, Pavan Nikhilesh In some use cases of integer division, denominator remains constant and numerator varies. It is possible to optimize division for such specific scenarios. The librte_sched uses rte_reciprocal to optimize division so, moving it to eal/common would allow other libraries and applications to use it. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- v8 Changes: - add SPDX license tags. v7 changes: - reworked u64 division (follows same flow as 32bit). - addressed review comments from v6 (Undo cosmetic changes to 32bit division) v6 changes: - remove cache alignment from rte_reciprocal_u{32/64}structures as they would be embedded in other structures. v5 changes: - fix test print strings v4 changes: - minor fix for test cases - fix u32 divisor generation v3 changes: - fix x86_32 compilation issue - fix improper licence in test v2 changes: - fix compilation issues with .map files - add test cases for correctness and performance - remove extra licence inclusion - fix coding style issues lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/common/Makefile | 1 + lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h | 0 lib/{librte_sched => librte_eal/common}/rte_reciprocal.c | 0 lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/rte_eal_version.map | 2 +- lib/librte_sched/Makefile | 2 -- lib/librte_sched/rte_sched.c | 2 +- 8 files changed, 5 insertions(+), 4 deletions(-) rename lib/{librte_sched => librte_eal/common/include}/rte_reciprocal.h (100%) rename lib/{librte_sched => librte_eal/common}/rte_reciprocal.c (100%) diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index c6940760f..028e81dd8 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -57,6 +57,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index 96904dea1..ea824a3a8 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -16,6 +16,7 @@ INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h +INC += rte_reciprocal.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h similarity index 100% rename from lib/librte_sched/rte_reciprocal.h rename to lib/librte_eal/common/include/rte_reciprocal.h diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c similarity index 100% rename from lib/librte_sched/rte_reciprocal.c rename to lib/librte_eal/common/rte_reciprocal.c diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 7bf278f3b..e50b73a69 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -64,6 +64,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 7088b7230..730966407 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -206,7 +206,7 @@ DPDK_18.02 { rte_hypervisor_get; rte_hypervisor_get_name; rte_vfio_clear_group; - + rte_reciprocal_value; } DPDK_17.11; EXPERIMENTAL { diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index 73c9d1a37..55d9c6989 100644 --- a/lib/librte_sched/Makefile +++ b/lib/librte_sched/Makefile @@ -26,10 +26,8 @@ LIBABIVER := 1 # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c -SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_sched_common.h rte_red.h rte_approx.h -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index ad2f7c6d5..634486c80 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -14,11 +14,11 @@ #include <rte_branch_prediction.h> #include <rte_mbuf.h> #include <rte_bitmap.h> +#include <rte_reciprocal.h> #include "rte_sched.h" #include "rte_sched_common.h" #include "rte_approx.h" -#include "rte_reciprocal.h" #ifdef __INTEL_COMPILER #pragma warning(disable:2259) /* conversion may lose significant bits */ -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh @ 2018-01-26 5:04 ` Pavan Nikhilesh 2018-01-29 6:42 ` Hemant Agrawal 2018-10-28 0:06 ` Ferruh Yigit 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 3/3] test: add tests for reciprocal based division Pavan Nikhilesh 2018-01-27 21:39 ` [dpdk-dev] [PATCH v8 1/3] eal: introduce integer divide through reciprocal Thomas Monjalon 2 siblings, 2 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2018-01-26 5:04 UTC (permalink / raw) To: thomas, cristian.dumitrescu, stephen; +Cc: dev, Pavan Nikhilesh Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit adds support for unsigned 64bit divisors. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- lib/librte_eal/common/include/rte_reciprocal.h | 49 ++++++++++++++ lib/librte_eal/common/rte_reciprocal.c | 92 ++++++++++++++++++++++++++ lib/librte_eal/rte_eal_version.map | 4 +- 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h index 5e21f096f..3492c73ba 100644 --- a/lib/librte_eal/common/include/rte_reciprocal.h +++ b/lib/librte_eal/common/include/rte_reciprocal.h @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ /* * Reciprocal divide * @@ -29,6 +32,11 @@ struct rte_reciprocal { uint8_t sh1, sh2; }; +struct rte_reciprocal_u64 { + uint64_t m; + uint8_t sh1, sh2; +}; + static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) { uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); @@ -36,6 +44,47 @@ static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R return (t + ((a - t) >> R.sh1)) >> R.sh2; } +static __rte_always_inline uint64_t +mullhi_u64(uint64_t x, uint64_t y) +{ +#ifdef __SIZEOF_INT128__ + __uint128_t xl = x; + __uint128_t rl = xl * y; + + return (rl >> 64); +#else + uint64_t u0, u1, v0, v1, k, t; + uint64_t w1, w2; + uint64_t whi; + + u1 = x >> 32; u0 = x & 0xFFFFFFFF; + v1 = y >> 32; v0 = y & 0xFFFFFFFF; + + t = u0*v0; + k = t >> 32; + + t = u1*v0 + k; + w1 = t & 0xFFFFFFFF; + w2 = t >> 32; + + t = u0*v1 + w1; + k = t >> 32; + + whi = u1*v1 + w2 + k; + + return whi; +#endif +} + +static __rte_always_inline uint64_t +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R) +{ + uint64_t t = mullhi_u64(a, R->m); + + return (t + ((a - t) >> R->sh1)) >> R->sh2; +} + struct rte_reciprocal rte_reciprocal_value(uint32_t d); +struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d); #endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c index 652f0237a..d81b11db6 100644 --- a/lib/librte_eal/common/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ /*- * BSD LICENSE * @@ -70,3 +73,92 @@ struct rte_reciprocal rte_reciprocal_value(uint32_t d) return R; } + +/* + * Code taken from Hacker's Delight: + * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt + * License permits inclusion here per: + * http://www.hackersdelight.org/permissions.htm + */ +static uint64_t +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) +{ + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ + uint64_t un1, un0, /* Norm. dividend LSD's. */ + vn1, vn0, /* Norm. divisor digits. */ + q1, q0, /* Quotient digits. */ + un64, un21, un10, /* Dividend digit pairs. */ + rhat; /* A remainder. */ + int s; /* Shift amount for norm. */ + + /* If overflow, set rem. to an impossible value. */ + if (u1 >= v) { + if (r != NULL) + *r = (uint64_t) -1; + return (uint64_t) -1; + } + + /* Count leading zeros. */ + s = __builtin_clzll(v); + if (s > 0) { + v = v << s; + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); + un10 = u0 << s; + } else { + + un64 = u1 | u0; + un10 = u0; + } + + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; + + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; + + q1 = un64/vn1; + rhat = un64 - q1*vn1; +again1: + if (q1 >= b || q1*vn0 > b*rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again1; + } + + un21 = un64*b + un1 - q1*v; + + q0 = un21/vn1; + rhat = un21 - q0*vn1; +again2: + if (q0 >= b || q0*vn0 > b*rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat < b) + goto again2; + } + + if (r != NULL) + *r = (un21*b + un0 - q0*v) >> s; + return q1*b + q0; +} + +struct rte_reciprocal_u64 +rte_reciprocal_value_u64(uint64_t d) +{ + struct rte_reciprocal_u64 R; + uint64_t m; + int l; + + l = 63 - __builtin_clzll(d); + + m = divide_128_div_64_to_64((1ULL << l), 0, d, NULL) << 1; + m = (1ULL << l) - d ? m + 2 : 1; + R.m = m; + + R.sh1 = l > 1 ? 1 : l; + R.sh2 = (l > 0) ? l : 0; + R.sh2 -= R.sh2 && (m == 1) ? 1 : 0; + + return R; +} diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 730966407..4355322e9 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -207,7 +207,9 @@ DPDK_18.02 { rte_hypervisor_get_name; rte_vfio_clear_group; rte_reciprocal_value; -} DPDK_17.11; + rte_reciprocal_value_u64; + +} DPDK_17.11; EXPERIMENTAL { global: -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh @ 2018-01-29 6:42 ` Hemant Agrawal 2018-01-29 7:54 ` Pavan Nikhilesh 2018-10-28 0:06 ` Ferruh Yigit 1 sibling, 1 reply; 19+ messages in thread From: Hemant Agrawal @ 2018-01-29 6:42 UTC (permalink / raw) To: Pavan Nikhilesh, thomas, cristian.dumitrescu, stephen; +Cc: dev Hi Pavan, I am bit late in checking it. (series is already applied) Just few legal queries. > --- a/lib/librte_eal/common/rte_reciprocal.c > +++ b/lib/librte_eal/common/rte_reciprocal.c > @@ -1,3 +1,6 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2017 Cavium, Inc you have added Cavium's copyright here. > + > +/* > + * Code taken from Hacker's Delight: > + * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt > + * License permits inclusion here per: > + * http://www.hackersdelight.org/permissions.htm > + */ Did you clarify it Cavium's legal team? The permissions states that you should not add it to another publication without written permission. DPDK can be considered a publication. Did you got written permission? If not please specify Cavium's legal team opinion or take permission from author. I believe that it is easy to obtain permission for this code. >>>>>> "You are free to use, copy, and distribute any of the code on this web site, whether modified by you or not. You need not give attribution. This includes the algorithms (some of which appear in Hacker's Delight), the Hacker's Assistant, and any code submitted by readers. Submitters implicitly agree to this. *The textural material and pictures are copyright by the author, and the usual copyright rules apply. E.g., you may store the material on your computer and make hard or soft copies for your own use. However, you may not incorporate this material into another publication without written permission from the author (which the author may give by email).* The author has taken care in the preparation of this material, but makes no expressed or implied warranty of any kind and assumes no responsibility for errors or omissions. No liability is assumed for incidental or consequential damages in connection with or arising out of the use of the information or programs contained herein." >>>>>>>>> regards, Hemant ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal 2018-01-29 6:42 ` Hemant Agrawal @ 2018-01-29 7:54 ` Pavan Nikhilesh 2018-01-29 8:14 ` Hemant Agrawal 0 siblings, 1 reply; 19+ messages in thread From: Pavan Nikhilesh @ 2018-01-29 7:54 UTC (permalink / raw) To: Hemant Agrawal, thomas, cristian.dumitrescu, stephen; +Cc: dev Hi Hemant, On Mon, Jan 29, 2018 at 12:12:55PM +0530, Hemant Agrawal wrote: > Hi Pavan, > I am bit late in checking it. (series is already applied) > > Just few legal queries. > > > --- a/lib/librte_eal/common/rte_reciprocal.c > +++ > b/lib/librte_eal/common/rte_reciprocal.c > > @@ -1,3 +1,6 @@ > > +/* SPDX-License-Identifier: BSD-3-Clause > > + * Copyright(c) 2017 Cavium, Inc > > you have added Cavium's copyright here. > > > + > > +/* > > + * Code taken from Hacker's Delight: > > + * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt > > + * License permits inclusion here per: > > + * http://www.hackersdelight.org/permissions.htm > > + */ > > Did you clarify it Cavium's legal team? > > The permissions states that you should not add it to another publication > without written permission. DPDK can be considered a publication. > Did you got written permission? The link specifically states that only the "The textural material and pictures are copyright by the author" and "You are free to use, copy, and distribute any of the code on this web site, whether modified by you or not". The textural material in this case mean any text/pictures taken from the Hacker's Delight book. The linux kernel also uses similar code as seen at https://github.com/torvalds/linux/blob/master/lib/div64.c. Let me know if any further clarification are needed. Regards, Pavan. > > If not please specify Cavium's legal team opinion or take permission from > author. I believe that it is easy to obtain permission for this code. > > >>>>>> > "You are free to use, copy, and distribute any of the code on this web site, > whether modified by you or not. You need not give attribution. This includes > the algorithms (some of which appear in Hacker's Delight), the Hacker's > Assistant, and any code submitted by readers. Submitters implicitly agree to > this. > > *The textural material and pictures are copyright by the author, and the > usual copyright rules apply. E.g., you may store the material on your > computer and make hard or soft copies for your own use. However, you may not > incorporate this material into another publication without written > permission from the author (which the author may give by email).* > > The author has taken care in the preparation of this material, but makes no > expressed or implied warranty of any kind and assumes no responsibility for > errors or omissions. No liability is assumed for incidental or consequential > damages in connection with or arising out of the use of the information or > programs contained herein." > >>>>>>>>> > regards, > Hemant > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal 2018-01-29 7:54 ` Pavan Nikhilesh @ 2018-01-29 8:14 ` Hemant Agrawal 0 siblings, 0 replies; 19+ messages in thread From: Hemant Agrawal @ 2018-01-29 8:14 UTC (permalink / raw) To: Pavan Nikhilesh, thomas, cristian.dumitrescu, stephen; +Cc: dev On 1/29/2018 1:24 PM, Pavan Nikhilesh wrote: > Hi Hemant, > > On Mon, Jan 29, 2018 at 12:12:55PM +0530, Hemant Agrawal wrote: >> Hi Pavan, >> I am bit late in checking it. (series is already applied) >> >> Just few legal queries. >> >>> --- a/lib/librte_eal/common/rte_reciprocal.c > +++ >> b/lib/librte_eal/common/rte_reciprocal.c >>> @@ -1,3 +1,6 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2017 Cavium, Inc >> >> you have added Cavium's copyright here. >> >>> + >>> +/* >>> + * Code taken from Hacker's Delight: >>> + * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt >>> + * License permits inclusion here per: >>> + * http://www.hackersdelight.org/permissions.htm >>> + */ >> >> Did you clarify it Cavium's legal team? >> >> The permissions states that you should not add it to another publication >> without written permission. DPDK can be considered a publication. >> Did you got written permission? > > The link specifically states that only the "The textural material and pictures > are copyright by the author" and "You are free to use, copy, and distribute any > of the code on this web site, whether modified by you or not". > The textural material in this case mean any text/pictures taken from the > Hacker's Delight book. > > The linux kernel also uses similar code as seen at > https://github.com/torvalds/linux/blob/master/lib/div64.c. > > Let me know if any further clarification are needed. Thanks. It is clear now. > > Regards, > Pavan. > >> >> If not please specify Cavium's legal team opinion or take permission from >> author. I believe that it is easy to obtain permission for this code. >> >>>>>>>> >> "You are free to use, copy, and distribute any of the code on this web site, >> whether modified by you or not. You need not give attribution. This includes >> the algorithms (some of which appear in Hacker's Delight), the Hacker's >> Assistant, and any code submitted by readers. Submitters implicitly agree to >> this. >> >> *The textural material and pictures are copyright by the author, and the >> usual copyright rules apply. E.g., you may store the material on your >> computer and make hard or soft copies for your own use. However, you may not >> incorporate this material into another publication without written >> permission from the author (which the author may give by email).* >> >> The author has taken care in the preparation of this material, but makes no >> expressed or implied warranty of any kind and assumes no responsibility for >> errors or omissions. No liability is assumed for incidental or consequential >> damages in connection with or arising out of the use of the information or >> programs contained herein." >>>>>>>>>>> >> regards, >> Hemant >> > ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2018-01-29 6:42 ` Hemant Agrawal @ 2018-10-28 0:06 ` Ferruh Yigit 1 sibling, 0 replies; 19+ messages in thread From: Ferruh Yigit @ 2018-10-28 0:06 UTC (permalink / raw) To: Pavan Nikhilesh, thomas, cristian.dumitrescu, stephen; +Cc: dev On 1/26/2018 5:04 AM, Pavan Nikhilesh wrote: > Currently, rte_reciprocal only supports unsigned 32bit divisors. This > commit adds support for unsigned 64bit divisors. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> <...> > +static uint64_t > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) > +{ > + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */ > + uint64_t un1, un0, /* Norm. dividend LSD's. */ > + vn1, vn0, /* Norm. divisor digits. */ > + q1, q0, /* Quotient digits. */ > + un64, un21, un10, /* Dividend digit pairs. */ > + rhat; /* A remainder. */ > + int s; /* Shift amount for norm. */ > + > + /* If overflow, set rem. to an impossible value. */ > + if (u1 >= v) { > + if (r != NULL) > + *r = (uint64_t) -1; > + return (uint64_t) -1; > + } > + > + /* Count leading zeros. */ > + s = __builtin_clzll(v); > + if (s > 0) { > + v = v << s; > + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); Hi Pavan, This is causing a cppcheck warning [1]. s is `int` and we know here s > 0, so `-s >> 31` should be 0xffffffff right, what is the point of this operation instead of using hardcoded value? [1] (error) Shifting signed 32-bit value by 31 bits is undefined behaviour ^ permalink raw reply [flat|nested] 19+ messages in thread
* [dpdk-dev] [PATCH v8 3/3] test: add tests for reciprocal based division 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh @ 2018-01-26 5:04 ` Pavan Nikhilesh 2018-01-27 21:39 ` [dpdk-dev] [PATCH v8 1/3] eal: introduce integer divide through reciprocal Thomas Monjalon 2 siblings, 0 replies; 19+ messages in thread From: Pavan Nikhilesh @ 2018-01-26 5:04 UTC (permalink / raw) To: thomas, cristian.dumitrescu, stephen; +Cc: dev, Pavan Nikhilesh This commit provides a set of tests for verifying the correctness and performance of both unsigned 32 and 64bit reciprocal based division. Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> --- test/test/Makefile | 2 + test/test/test_reciprocal_division.c | 167 +++++++++++++++++++++++++ test/test/test_reciprocal_division_perf.c | 201 ++++++++++++++++++++++++++++++ 3 files changed, 370 insertions(+) create mode 100644 test/test/test_reciprocal_division.c create mode 100644 test/test/test_reciprocal_division_perf.c diff --git a/test/test/Makefile b/test/test/Makefile index 5ba5a9ac7..c943a927a 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -67,6 +67,8 @@ SRCS-y += test_spinlock.c SRCS-y += test_memory.c SRCS-y += test_memzone.c SRCS-y += test_bitmap.c +SRCS-y += test_reciprocal_division.c +SRCS-y += test_reciprocal_division_perf.c SRCS-y += test_ring.c SRCS-y += test_ring_perf.c diff --git a/test/test/test_reciprocal_division.c b/test/test/test_reciprocal_division.c new file mode 100644 index 000000000..8ea9b1d24 --- /dev/null +++ b/test/test/test_reciprocal_division.c @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "test.h" + +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_random.h> +#include <rte_reciprocal.h> + +#define MAX_ITERATIONS (1ULL << 32) +#define DIVIDE_ITER (100) + +static int +test_reciprocal(void) +{ + int result = 0; + uint32_t divisor_u32 = 0; + uint32_t dividend_u32; + uint32_t nresult_u32; + uint32_t rresult_u32; + uint64_t i, j; + uint64_t divisor_u64 = 0; + uint64_t dividend_u64; + uint64_t nresult_u64; + uint64_t rresult_u64; + struct rte_reciprocal reci_u32 = {0}; + struct rte_reciprocal_u64 reci_u64 = {0}; + + rte_srand(rte_rdtsc()); + printf("Validating unsigned 32bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u32 = rte_rand(); + reci_u32 = rte_reciprocal_value(divisor_u32); + } + + dividend_u32 = rte_rand(); + nresult_u32 = dividend_u32 / divisor_u32; + rresult_u32 = rte_reciprocal_divide(dividend_u32, + reci_u32); + if (nresult_u32 != rresult_u32) { + printf("Division failed, %"PRIu32"/%"PRIu32" = " + "expected %"PRIu32" result %"PRIu32"\n", + dividend_u32, divisor_u32, + nresult_u32, rresult_u32); + result = 1; + break; + } + } + + printf("Validating unsigned 64bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand(); + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("Validating unsigned 64bit division with 32bit divisor.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand() >> 32; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("Validating division by power of 2.\n"); + for (i = 0; i < 32; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + reci_u32 = rte_reciprocal_value((uint32_t)divisor_u64); + + for (j = 0; j < MAX_ITERATIONS >> 4; j++) { + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf( + "Division 64 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + } + + nresult_u32 = (dividend_u64 >> 32) / divisor_u64; + rresult_u32 = rte_reciprocal_divide( + (dividend_u64 >> 32), reci_u32); + + if (nresult_u32 != rresult_u32) { + printf( + "Division 32 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64 >> 32, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + } + + for (; i < 64; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + + for (j = 0; j < MAX_ITERATIONS >> 4; j++) { + dividend_u64 = rte_rand(); + + nresult_u64 = dividend_u64 / divisor_u64; + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + + if (nresult_u64 != rresult_u64) { + printf("Division failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + } + + return result; +} + +REGISTER_TEST_COMMAND(reciprocal_division, test_reciprocal); diff --git a/test/test/test_reciprocal_division_perf.c b/test/test/test_reciprocal_division_perf.c new file mode 100644 index 000000000..a7be8aa71 --- /dev/null +++ b/test/test/test_reciprocal_division_perf.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "test.h" + +#include <stdio.h> +#include <unistd.h> +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_random.h> +#include <rte_reciprocal.h> + +#define MAX_ITERATIONS (1ULL << 32) +#define DIVIDE_ITER (1ULL << 28) + +static int +test_reciprocal_division_perf(void) +{ + int result = 0; + uint32_t divisor_u32 = 0; + uint32_t dividend_u32; + uint64_t divisor_u64 = 0; + uint64_t dividend_u64; + volatile uint32_t nresult_u32; + volatile uint32_t rresult_u32; + volatile uint64_t nresult_u64; + volatile uint64_t rresult_u64; + uint64_t start_cyc; + uint64_t split_cyc; + uint64_t end_cyc; + uint64_t tot_cyc_n = 0; + uint64_t tot_cyc_r = 0; + uint64_t i; + struct rte_reciprocal reci_u32 = {0}; + struct rte_reciprocal_u64 reci_u64 = {0}; + + rte_srand(rte_rdtsc()); + + printf("Validating unsigned 32bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u32 = rte_rand(); + reci_u32 = rte_reciprocal_value(divisor_u32); + } + + dividend_u32 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u32 = dividend_u32 / divisor_u32; + split_cyc = rte_rdtsc(); + rresult_u32 = rte_reciprocal_divide(dividend_u32, + reci_u32); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u32 != rresult_u32) { + printf("Division failed, expected %"PRIu32" " + "result %"PRIu32"", + nresult_u32, rresult_u32); + result = 1; + break; + } + } + printf("32bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating unsigned 64bit division.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand(); + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division failed, expected %"PRIu64" " + "result %"PRIu64"", + nresult_u64, rresult_u64); + result = 1; + break; + } + } + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating unsigned 64bit division with 32bit divisor.\n"); + for (i = 0; i < MAX_ITERATIONS; i++) { + /* Change divisor every DIVIDE_ITER iterations. */ + if (i % DIVIDE_ITER == 0) { + divisor_u64 = rte_rand() >> 32; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + } + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division failed, expected %"PRIu64" " + "result %"PRIu64"", + nresult_u64, rresult_u64); + result = 1; + break; + } + } + + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n\n", + ((double)tot_cyc_r)/i); + + tot_cyc_n = 0; + tot_cyc_r = 0; + + printf("Validating division by power of 2.\n"); + for (i = 0; i < 64; i++) { + divisor_u64 = 1ull << i; + reci_u64 = rte_reciprocal_value_u64(divisor_u64); + + dividend_u64 = rte_rand(); + + start_cyc = rte_rdtsc(); + nresult_u64 = dividend_u64 / divisor_u64; + split_cyc = rte_rdtsc(); + rresult_u64 = rte_reciprocal_divide_u64(dividend_u64, + &reci_u64); + end_cyc = rte_rdtsc(); + + tot_cyc_n += split_cyc - start_cyc; + tot_cyc_r += end_cyc - split_cyc; + if (nresult_u64 != rresult_u64) { + printf("Division 64 failed, %"PRIu64"/%"PRIu64" = " + "expected %"PRIu64" result %"PRIu64"\n", + dividend_u64, divisor_u64, + nresult_u64, rresult_u64); + result = 1; + break; + } + } + printf("64bit Division results:\n"); + printf("Total number of cycles normal division : %"PRIu64"\n", + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); + printf("Cycles per division(normal) : %3.2f\n", + ((double)tot_cyc_n)/i); + printf("Cycles per division(reciprocal) : %3.2f\n", + ((double)tot_cyc_r)/i); + + return result; +} + +REGISTER_TEST_COMMAND(reciprocal_division_perf, test_reciprocal_division_perf); -- 2.14.1 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [dpdk-dev] [PATCH v8 1/3] eal: introduce integer divide through reciprocal 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 3/3] test: add tests for reciprocal based division Pavan Nikhilesh @ 2018-01-27 21:39 ` Thomas Monjalon 2 siblings, 0 replies; 19+ messages in thread From: Thomas Monjalon @ 2018-01-27 21:39 UTC (permalink / raw) To: Pavan Nikhilesh; +Cc: dev, cristian.dumitrescu, stephen 26/01/2018 06:04, Pavan Nikhilesh: > In some use cases of integer division, denominator remains constant and > numerator varies. It is possible to optimize division for such specific > scenarios. > > The librte_sched uses rte_reciprocal to optimize division so, moving it to > eal/common would allow other libraries and applications to use it. > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com> Series applied, thanks ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2018-10-28 0:06 UTC | newest] Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-08-29 18:46 [dpdk-dev] [PATCH 1/2] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-08-29 18:46 ` [dpdk-dev] [PATCH 2/2] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2017-08-29 19:35 ` Stephen Hemminger 2017-08-29 19:38 ` Stephen Hemminger 2017-08-30 4:05 ` Pavan Nikhilesh Bhagavatula 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2017-12-04 13:22 ` [dpdk-dev] [PATCH v7 3/3] test: add tests for reciprocal based division Pavan Nikhilesh 2018-01-18 23:49 ` Thomas Monjalon 2018-01-25 22:35 ` Thomas Monjalon 2018-01-11 16:01 ` [dpdk-dev] [PATCH v7 1/3] eal: introduce integer divide through reciprocal Dumitrescu, Cristian 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 " Pavan Nikhilesh 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh 2018-01-29 6:42 ` Hemant Agrawal 2018-01-29 7:54 ` Pavan Nikhilesh 2018-01-29 8:14 ` Hemant Agrawal 2018-10-28 0:06 ` Ferruh Yigit 2018-01-26 5:04 ` [dpdk-dev] [PATCH v8 3/3] test: add tests for reciprocal based division Pavan Nikhilesh 2018-01-27 21:39 ` [dpdk-dev] [PATCH v8 1/3] eal: introduce integer divide through reciprocal Thomas Monjalon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).