Re: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal

DPDK patches and discussions
 help / color / mirror / Atom feed

From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
To: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Cc: "Dumitrescu, Cristian" <cristian.dumitrescu@intel.com>,
	"stephen@networkplumber.org" <stephen@networkplumber.org>
Subject: Re: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
Date: Mon, 4 Sep 2017 14:34:49 +0000	[thread overview]
Message-ID: <C6ECDF3AB251BE4894318F4E4512369782293905@IRSMSX109.ger.corp.intel.com> (raw)
In-Reply-To: <1504442189-4384-2-git-send-email-pbhagavatula@caviumnetworks.com>

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pavan Nikhilesh
> Sent: Sunday, September 3, 2017 1:36 PM
> To: dev@dpdk.org
> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
> stephen@networkplumber.org; Pavan Nikhilesh
> <pbhagavatula@caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
> 
> Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit
> adds support for unsigned 64bit divisors.
> 
> Rename unsigned 32bit specific functions appropriately and update
> librte_sched accordingly.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
> ---
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
>  lib/librte_eal/common/include/rte_reciprocal.h  | 111
> ++++++++++++++++++++--
>  lib/librte_eal/common/rte_reciprocal.c          | 120
> +++++++++++++++++++++---
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
>  lib/librte_sched/Makefile                       |   4 +-
>  lib/librte_sched/rte_sched.c                    |   9 +-
>  6 files changed, 222 insertions(+), 28 deletions(-)
> 
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index d0bda66..5fd6101 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -242,6 +242,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> b/lib/librte_eal/common/include/rte_reciprocal.h
> index b6d752f..801d1c8 100644
> --- a/lib/librte_eal/common/include/rte_reciprocal.h
> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> @@ -22,22 +22,117 @@
>  #ifndef _RTE_RECIPROCAL_H_
>  #define _RTE_RECIPROCAL_H_
> 
> -#include <stdint.h>
> +#include <rte_memory.h>
> 
> -struct rte_reciprocal {
> +/**
> + * Unsigned 32-bit divisor structure.
> + */
> +struct rte_reciprocal_u32 {
>  	uint32_t m;
>  	uint8_t sh1, sh2;
> -};
> +} __rte_cache_aligned;
> +
> +/**
> + * Unsigned 64-bit divisor structure.
> + */
> +struct rte_reciprocal_u64 {
> +	uint64_t m;
> +	uint8_t sh1;
> +} __rte_cache_aligned;
> 
> +/**
> + * Divide given unsigned 32-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 32-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
>  static inline uint32_t
> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) {
> +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> +
> +	return (t + ((a - t) >> R->sh1)) >> R->sh2; }
> +
> +static inline uint64_t
> +mullhi_u64(uint64_t x, uint64_t y)
> +{
> +#ifdef __SIZEOF_INT128__
> +	__uint128_t xl = x;
> +	__uint128_t rl = xl * y;
> +
> +	return (rl >> 64);
> +#else
> +	uint64_t u0, u1, v0, v1, k, t;
> +	uint64_t w1, w2;
> +	uint64_t whi;
> +
> +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> +
> +	t = u0*v0;
> +	k = t >> 32;
> +
> +	t = u1*v0 + k;
> +	w1 = t & 0xFFFFFFFF;
> +	w2 = t >> 32;
> +
> +	t = u0*v1 + w1;
> +	k = t >> 32;
> +
> +	whi = u1*v1 + w2 + k;
> +
> +	return whi;
> +#endif
> +}
> +
> +/**
> + * Divide given unsigned 64-bit integer with pre calculated divisor.
> + *
> + * @param a
> + *   The 64-bit dividend.
> + * @param R
> + *   The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + *   The result of the division
> + */
> +static inline uint64_t
> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
>  {
> -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> +	uint64_t q = mullhi_u64(R->m, a);
> +	uint64_t t = ((a - q) >> 1) + q;
> 
> -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> +	return t >> R->sh1;
>  }
> 
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d);
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 32-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d);
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + *   The unsigned 64-bit divisor.
> + *
> + * @return
> + *   Divisor structure.
> + */
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d);
> 
>  #endif /* _RTE_RECIPROCAL_H_ */
> diff --git a/lib/librte_eal/common/rte_reciprocal.c
> b/lib/librte_eal/common/rte_reciprocal.c
> index 7ab99b4..5d7e367 100644
> --- a/lib/librte_eal/common/rte_reciprocal.c
> +++ b/lib/librte_eal/common/rte_reciprocal.c
> @@ -31,18 +31,13 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
>   */
> 
> -#include <stdio.h>
> -#include <stdint.h>
> -
> -#include <rte_common.h>
> -
> -#include "rte_reciprocal.h"
> +#include <rte_reciprocal.h>
> 
>  /* find largest set bit.
>   * portable and slow but does not matter for this usage.
>   */
>  static inline int
> -fls(uint32_t x)
> +fls_u32(uint32_t x)
>  {
>  	int b;
> 
> @@ -54,21 +49,120 @@ fls(uint32_t x)
>  	return 0;
>  }
> 
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d)
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d)
>  {
> -	struct rte_reciprocal R;
> +	struct rte_reciprocal_u32 R;
>  	uint64_t m;
>  	int l;
> 
> -	l = fls(d - 1);
> +	l = fls_u32(d - 1);
>  	m = ((1ULL << 32) * ((1ULL << l) - d));
>  	m /= d;
> 
>  	++m;
>  	R.m = m;
> -	R.sh1 = RTE_MIN(l, 1);
> -	R.sh2 = RTE_MAX(l - 1, 0);
> +	R.sh1 = l > 1 ? 1 : l;
> +	R.sh2 = (l - 1 > 0) ? l - 1 : 0;

Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 seems identical to the previous version (so reason for change is unclear), and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in practice it won't) without any explanation given.

Thanks,
Anatoly

> +
> +	return R;
> +}
> +
> +/* Code taken from Hacker's Delight:
> + * http://www.hackersdelight.org/HDcode/divlu.c.
> + * License permits inclusion here per:
> + * http://www.hackersdelight.org/permissions.htm
> + */
> +static inline uint64_t
> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t
> +*r) {
> +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> +			 vn1, vn0,           /* Norm. divisor digits. */
> +			 q1, q0,             /* Quotient digits. */
> +			 un64, un21, un10,   /* Dividend digit pairs. */
> +			 rhat;               /* A remainder. */
> +	int s;                       /* Shift amount for norm. */
> +
> +    /* If overflow, set rem. to an impossible value. */
> +	if (u1 >= v) {
> +		if (r != NULL)
> +			*r = (uint64_t) -1;
> +		return (uint64_t) -1;
> +	}
> +
> +	/* Count leading zeros. */
> +	s = __builtin_clzll(v);
> +	if (s > 0) {
> +		v = v << s;
> +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> +		un10 = u0 << s;
> +	} else {
> +
> +		un64 = u1 | u0;
> +		un10 = u0;
> +	}
> +
> +	vn1 = v >> 32;
> +	vn0 = v & 0xFFFFFFFF;
> +
> +	un1 = un10 >> 32;
> +	un0 = un10 & 0xFFFFFFFF;
> +
> +	q1 = un64/vn1;
> +	rhat = un64 - q1*vn1;
> +again1:
> +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> +		q1 = q1 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again1;
> +	}
> +
> +	un21 = un64*b + un1 - q1*v;
> +
> +	q0 = un21/vn1;
> +	rhat = un21 - q0*vn1;
> +again2:
> +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> +		q0 = q0 - 1;
> +		rhat = rhat + vn1;
> +		if (rhat < b)
> +			goto again2;
> +	}
> +
> +	if (r != NULL)
> +		*r = (un21*b + un0 - q0*v) >> s;
> +	return q1*b + q0;
> +}
> +
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d)
> +{
> +	struct rte_reciprocal_u64 R;
> +
> +	const uint32_t fld = 63 - __builtin_clzll(d);
> +
> +	if ((d & (d - 1)) == 0) {
> +		R.m = 0;
> +		R.sh1 = (fld - 1) | 0x40;
> +	} else {
> +		uint64_t rem;
> +		uint64_t multiplier;
> +		uint8_t more;
> +
> +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> &rem);
> +		multiplier += multiplier;
> +
> +		const uint64_t twice_rem = rem + rem;
> +		if (twice_rem >= d || twice_rem < rem)
> +			multiplier += 1;
> +		more = fld;
> +		R.m = 1 + multiplier;
> +		R.sh1 = more | 0x40;
> +	}
> +
> +	R.sh1 &= 0x3F;
> 
>  	return R;
>  }
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 65117cb..63ff2b8 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -247,6 +247,7 @@ EXPERIMENTAL {
>  DPDK_17.11 {
>  	global:
> 
> -	rte_reciprocal_value;
> +	rte_reciprocal_value_u32;
> +	rte_reciprocal_value_u64;
> 
>  } DPDK_17.08;
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index
> 569656b..a2fd6f3 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -54,6 +54,8 @@ LIBABIVER := 1
>  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> 
>  # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include +=
> rte_approx.h
> 
>  include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index
> 3b8ccaa..7bb6d51 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -228,7 +228,7 @@ struct rte_sched_port {
>  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> cyles */
>  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> */
>  	uint64_t time;                /* Current NIC TX time measured in bytes */
> -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> byte
> +*/
> 
>  	/* Scheduling loop detection */
>  	uint32_t pipe_loop;
> @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> rte_sched_port_params *params)
> 
>  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
>  		/ params->rate;
> -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> +	port->inv_cycles_per_byte =
> rte_reciprocal_value_u32(cycles_per_byte);
> 
>  	/* Scheduling loop detection */
>  	port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9
> @@ rte_sched_port_time_resync(struct rte_sched_port *port)
>  	uint64_t bytes_diff;
> 
>  	/* Compute elapsed time in bytes */
> -	bytes_diff = rte_reciprocal_divide(cycles_diff <<
> RTE_SCHED_TIME_SHIFT,
> -					   port->inv_cycles_per_byte);
> +	bytes_diff = rte_reciprocal_divide_u32(
> +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> +			&port->inv_cycles_per_byte);
> 
>  	/* Advance port time */
>  	port->time_cpu_cycles = cycles;
> --
> 2.7.4

next prev parent reply	other threads:[~2017-09-04 14:34 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-03 12:36 [dpdk-dev] [PATCH v3 1/3] eal: introduce integer divide through reciprocal Pavan Nikhilesh
2017-09-03 12:36 ` [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal Pavan Nikhilesh
2017-09-04 14:34   ` Burakov, Anatoly [this message]
2017-09-04 14:55     ` Pavan Nikhilesh Bhagavatula
2017-09-03 12:36 ` [dpdk-dev] [PATCH v3 3/3] test: add tests for reciprocal based division Pavan Nikhilesh
2017-09-04  9:18   ` Burakov, Anatoly
2017-09-04  9:49     ` Pavan Nikhilesh Bhagavatula
2017-09-04 14:01 ` [dpdk-dev] [PATCH v3 1/3] eal: introduce integer divide through reciprocal Burakov, Anatoly

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=C6ECDF3AB251BE4894318F4E4512369782293905@IRSMSX109.ger.corp.intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=cristian.dumitrescu@intel.com \
    --cc=dev@dpdk.org \
    --cc=pbhagavatula@caviumnetworks.com \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).