DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] hash: update jhash function with the latest available
@ 2015-04-16 13:26 Pablo de Lara
  2015-04-16 14:01 ` Bruce Richardson
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
  0 siblings, 2 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-16 13:26 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

Check out: http://burtleburtle.net/bob/c/lookup3.c

This patch integrates that code in the rte_jhash library,
adding also a new function rte_jhash_word2,
that returns two different hash values, for a single key.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  407 ++++++++++++++++++++++++++++++++++++-------
 1 files changed, 347 insertions(+), 60 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..3de006d 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,51 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +108,256 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
+	union {
+		const void *ptr;
+		size_t i;
+	} u;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	u.ptr = key;
 
-		__rte_jhash_mix(a,b,c);
+	if ((u.i & 0x3) == 0) {
+		const uint32_t *k = (const uint32_t *)key;
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2]&0xffffff; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2]&0xffff; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2]&0xff; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1]&0xffffff; a += k[0]; break;
+		case 6:
+			b += k[1]&0xffff; a += k[0]; break;
+		case 5:
+			b += k[1]&0xff; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0]&0xffffff; break;
+		case 2:
+			a += k[0]&0xffff; break;
+		case 1:
+			a += k[0]&0xff; break;
+#else
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2]&0xffffff00; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2]&0xffff0000; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2]&0xff000000; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1]&0xffffff00; a += k[0]; break;
+		case 6:
+			b += k[1]&0xffff0000; a += k[0]; break;
+		case 5:
+			b += k[1]&0xff000000; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0]&0xffffff00; break;
+		case 2:
+			a += k[0]&0xffff0000; break;
+		case 1:
+			a += k[0]&0xff000000; break;
+#endif
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	} else if ((u.i & 0x1) == 0) {
+		/* read 16-bit chunks */
+		const uint16_t *k = (const uint16_t *)key;
+		const uint8_t  *k8;
+
+		/* all but last block: aligned reads and different mixing */
+		while (length > 12) {
+			a += k[0] + (((uint32_t)k[1])<<16);
+			b += k[2] + (((uint32_t)k[3])<<16);
+			c += k[4] + (((uint32_t)k[5])<<16);
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 6;
+			length -= 12;
+		}
+
+		/* handle the last (probably partial) block */
+		k8 = (const uint8_t *)k;
+		switch (length) {
+		case 12:
+			c += k[4]+(((uint32_t)k[5])<<16);
+			b += k[2]+(((uint32_t)k[3])<<16);
+			a += k[0]+(((uint32_t)k[1])<<16);
+			break;
+		case 11:
+			/* fall through */
+			c += ((uint32_t)k8[10])<<16;
+		case 10:
+			c += k[4];
+			b += k[2]+(((uint32_t)k[3])<<16);
+			a += k[0]+(((uint32_t)k[1])<<16);
+			break;
+		case 9:
+			/* fall through */
+			c += k8[8];
+		case 8:
+			b += k[2]+(((uint32_t)k[3])<<16);
+			a += k[0]+(((uint32_t)k[1])<<16);
+			break;
+		case 7:
+			/* fall through */
+			b += ((uint32_t)k8[6])<<16;
+		case 6:
+			b += k[2];
+			a += k[0]+(((uint32_t)k[1])<<16);
+			break;
+		case 5:
+			/* fall through */
+			b += k8[4];
+		case 4:
+			a += k[0]+(((uint32_t)k[1])<<16);
+			break;
+		case 3:
+			/* fall through */
+			a += ((uint32_t)k8[2])<<16;
+		case 2:
+			a += k[0];
+			break;
+		case 1:
+			a += k8[0];
+			break;
+		case 0:
+			/* zero length requires no mixing */
+			return c;
+		}
+#endif
+	} else {
+		const uint8_t *k = (const uint8_t *)key;
+
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+			a += k[0];
+			a += ((uint32_t)k[1])<<8;
+			a += ((uint32_t)k[2])<<16;
+			a += ((uint32_t)k[3])<<24;
+			b += k[4];
+			b += ((uint32_t)k[5])<<8;
+			b += ((uint32_t)k[6])<<16;
+			b += ((uint32_t)k[7])<<24;
+			c += k[8];
+			c += ((uint32_t)k[9])<<8;
+			c += ((uint32_t)k[10])<<16;
+			c += ((uint32_t)k[11])<<24;
+#else
+			a += ((uint32_t)k[0])<<24;
+			a += ((uint32_t)k[1])<<16;
+			a += ((uint32_t)k[2])<<8;
+			a += ((uint32_t)k[3]);
+			b += ((uint32_t)k[4])<<24;
+			b += ((uint32_t)k[5])<<16;
+			b += ((uint32_t)k[6])<<8;
+			b += ((uint32_t)k[7]);
+			c += ((uint32_t)k[8])<<32;
+			c += ((uint32_t)k[9])<<16;
+			c += ((uint32_t)k[10])<<8;
+			c += ((uint32_t)k[11]);
+#endif
+
+			__rte_jhash_mix(a, b, c);
 
-	__rte_jhash_mix(a,b,c);
+			k += 12;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		/* all the case statements fall through */
+		switch (length) {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+		case 12:
+			c += ((uint32_t)k[11])<<24;
+		case 11:
+			c += ((uint32_t)k[10])<<16;
+		case 10:
+			c += ((uint32_t)k[9])<<8;
+		case 9:
+			c += k[8];
+		case 8:
+			b += ((uint32_t)k[7])<<24;
+		case 7:
+			b += ((uint32_t)k[6])<<16;
+		case 6:
+			b += ((uint32_t)k[5])<<8;
+		case 5:
+			b += k[4];
+		case 4:
+			a += ((uint32_t)k[3])<<24;
+		case 3:
+			a += ((uint32_t)k[2])<<16;
+		case 2:
+			a += ((uint32_t)k[1])<<8;
+		case 1:
+			a += k[0];
+		break;
+#else
+		case 12:
+			c += k[11];
+		case 11:
+			c += ((uint32_t)k[10])<<8;
+		case 10:
+			c += ((uint32_t)k[9])<<16;
+		case 9:
+			c += ((uint32_t)k[8])<<24;
+		case 8:
+			b += k[7];
+		case 7:
+			b += ((uint32_t)k[6])<<8;
+		case 6:
+			b += ((uint32_t)k[5])<<16;
+		case 5:
+			b += ((uint32_t)k[4])<<24;
+		case 4:
+			a += k[3];
+		case 3:
+			a += ((uint32_t)k[2])<<8;
+		case 2:
+			a += ((uint32_t)k[1])<<16;
+		case 1:
+			a += ((uint32_t)k[0])<<24;
+		break;
+#endif
+		case 0:
+			return c;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,33 +378,93 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length)<<2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
+/**
+ * Same as rte_jhash2, but take two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_word2(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length)<<2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH] hash: update jhash function with the latest available
  2015-04-16 13:26 [dpdk-dev] [PATCH] hash: update jhash function with the latest available Pablo de Lara
@ 2015-04-16 14:01 ` Bruce Richardson
  2015-04-17 16:03   ` De Lara Guarch, Pablo
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
  1 sibling, 1 reply; 62+ messages in thread
From: Bruce Richardson @ 2015-04-16 14:01 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Thu, Apr 16, 2015 at 02:26:59PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> Check out: http://burtleburtle.net/bob/c/lookup3.c
> 
> This patch integrates that code in the rte_jhash library,
> adding also a new function rte_jhash_word2,
> that returns two different hash values, for a single key.
> 

Should the addition of the new functionality not be a separate patch from the
update to the existing code?
Also, do the new functions return the exact same values as the previous versions,
just faster?

> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> ---
>  lib/librte_hash/rte_jhash.h |  407 ++++++++++++++++++++++++++++++++++++-------
>  1 files changed, 347 insertions(+), 60 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> index a4bf5a1..3de006d 100644
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -45,38 +45,51 @@ extern "C" {
>  #endif
>  
>  #include <stdint.h>
> +#include <rte_byteorder.h>
>  
>  /* jhash.h: Jenkins hash support.
>   *
> - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
> + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
>   *
>   * http://burtleburtle.net/bob/hash/
>   *
>   * These are the credits from Bob's sources:
>   *
> - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> - * hash(), hash2(), hash3, and mix() are externally useful functions.
> - * Routines to test the hash are included if SELF_TEST is defined.
> - * You can use this free for any purpose.  It has no warranty.
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
>   *
>   * $FreeBSD$
>   */
>  
> +#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k))))
> +
>  /** @internal Internal function. NOTE: Arguments are modified. */
>  #define __rte_jhash_mix(a, b, c) do { \
> -	a -= b; a -= c; a ^= (c>>13); \
> -	b -= c; b -= a; b ^= (a<<8); \
> -	c -= a; c -= b; c ^= (b>>13); \
> -	a -= b; a -= c; a ^= (c>>12); \
> -	b -= c; b -= a; b ^= (a<<16); \
> -	c -= a; c -= b; c ^= (b>>5); \
> -	a -= b; a -= c; a ^= (c>>3); \
> -	b -= c; b -= a; b ^= (a<<10); \
> -	c -= a; c -= b; c ^= (b>>15); \
> +	a -= c; a ^= rot(c, 4); c += b; \
> +	b -= a; b ^= rot(a, 6); a += c; \
> +	c -= b; c ^= rot(b, 8); b += a; \
> +	a -= c; a ^= rot(c, 16); c += b; \
> +	b -= a; b ^= rot(a, 19); a += c; \
> +	c -= b; c ^= rot(b, 4); b += a; \
> +} while (0)
> +
> +#define __rte_jhash_final(a, b, c) do { \
> +	c ^= b; c -= rot(b, 14); \
> +	a ^= c; a -= rot(c, 11); \
> +	b ^= a; b -= rot(a, 25); \
> +	c ^= b; c -= rot(b, 16); \
> +	a ^= c; a -= rot(c, 4);  \
> +	b ^= a; b -= rot(a, 14); \
> +	c ^= b; c -= rot(b, 24); \
>  } while (0)
>  
>  /** The golden ratio: an arbitrary value. */
> -#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
> +#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
>  
>  /**
>   * The most generic version, hashes an arbitrary sequence
> @@ -95,42 +108,256 @@ extern "C" {
>  static inline uint32_t
>  rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  {
> -	uint32_t a, b, c, len;
> -	const uint8_t *k = (const uint8_t *)key;
> -	const uint32_t *k32 = (const uint32_t *)key;
> +	uint32_t a, b, c;
> +	union {
> +		const void *ptr;
> +		size_t i;
> +	} u;
>  
> -	len = length;
> -	a = b = RTE_JHASH_GOLDEN_RATIO;
> -	c = initval;
> +	/* Set up the internal state */
> +	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
>  
> -	while (len >= 12) {
> -		a += k32[0];
> -		b += k32[1];
> -		c += k32[2];
> +	u.ptr = key;
>  
> -		__rte_jhash_mix(a,b,c);
> +	if ((u.i & 0x3) == 0) {
> +		const uint32_t *k = (const uint32_t *)key;
>  
> -		k += (3 * sizeof(uint32_t)), k32 += 3;
> -		len -= (3 * sizeof(uint32_t));
> -	}
> +		while (length > 12) {
> +			a += k[0];
> +			b += k[1];
> +			c += k[2];
>  
> -	c += length;
> -	switch (len) {
> -		case 11: c += ((uint32_t)k[10] << 24);
> -		case 10: c += ((uint32_t)k[9] << 16);
> -		case 9 : c += ((uint32_t)k[8] << 8);
> -		case 8 : b += ((uint32_t)k[7] << 24);
> -		case 7 : b += ((uint32_t)k[6] << 16);
> -		case 6 : b += ((uint32_t)k[5] << 8);
> -		case 5 : b += k[4];
> -		case 4 : a += ((uint32_t)k[3] << 24);
> -		case 3 : a += ((uint32_t)k[2] << 16);
> -		case 2 : a += ((uint32_t)k[1] << 8);
> -		case 1 : a += k[0];
> -		default: break;
> -	};
> +			__rte_jhash_mix(a, b, c);
> +
> +			k += 3;
> +			length -= 12;
> +		}
> +
> +		switch (length) {
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +		case 12:
> +			c += k[2]; b += k[1]; a += k[0]; break;
> +		case 11:
> +			c += k[2]&0xffffff; b += k[1]; a += k[0]; break;
> +		case 10:
> +			c += k[2]&0xffff; b += k[1]; a += k[0]; break;
> +		case 9:
> +			c += k[2]&0xff; b += k[1]; a += k[0]; break;
> +		case 8:
> +			b += k[1]; a += k[0]; break;
> +		case 7:
> +			b += k[1]&0xffffff; a += k[0]; break;
> +		case 6:
> +			b += k[1]&0xffff; a += k[0]; break;
> +		case 5:
> +			b += k[1]&0xff; a += k[0]; break;
> +		case 4:
> +			a += k[0]; break;
> +		case 3:
> +			a += k[0]&0xffffff; break;
> +		case 2:
> +			a += k[0]&0xffff; break;
> +		case 1:
> +			a += k[0]&0xff; break;
> +#else
> +		case 12:
> +			c += k[2]; b += k[1]; a += k[0]; break;
> +		case 11:
> +			c += k[2]&0xffffff00; b += k[1]; a += k[0]; break;
> +		case 10:
> +			c += k[2]&0xffff0000; b += k[1]; a += k[0]; break;
> +		case 9:
> +			c += k[2]&0xff000000; b += k[1]; a += k[0]; break;
> +		case 8:
> +			b += k[1]; a += k[0]; break;
> +		case 7:
> +			b += k[1]&0xffffff00; a += k[0]; break;
> +		case 6:
> +			b += k[1]&0xffff0000; a += k[0]; break;
> +		case 5:
> +			b += k[1]&0xff000000; a += k[0]; break;
> +		case 4:
> +			a += k[0]; break;
> +		case 3:
> +			a += k[0]&0xffffff00; break;
> +		case 2:
> +			a += k[0]&0xffff0000; break;
> +		case 1:
> +			a += k[0]&0xff000000; break;
> +#endif

Only the constants seem different in this block. Can we get rid of the
#ifdefs using rte_XX_to_cpu() calls instead?

> +		/* zero length strings require no mixing */
> +		case 0:
> +			return c;
> +		};
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +	} else if ((u.i & 0x1) == 0) {
> +		/* read 16-bit chunks */
> +		const uint16_t *k = (const uint16_t *)key;
> +		const uint8_t  *k8;
> +
> +		/* all but last block: aligned reads and different mixing */
> +		while (length > 12) {
> +			a += k[0] + (((uint32_t)k[1])<<16);
> +			b += k[2] + (((uint32_t)k[3])<<16);
> +			c += k[4] + (((uint32_t)k[5])<<16);
> +
> +			__rte_jhash_mix(a, b, c);
> +
> +			k += 6;
> +			length -= 12;
> +		}
> +
> +		/* handle the last (probably partial) block */
> +		k8 = (const uint8_t *)k;
> +		switch (length) {
> +		case 12:
> +			c += k[4]+(((uint32_t)k[5])<<16);
> +			b += k[2]+(((uint32_t)k[3])<<16);
> +			a += k[0]+(((uint32_t)k[1])<<16);
> +			break;
> +		case 11:
> +			/* fall through */
> +			c += ((uint32_t)k8[10])<<16;
> +		case 10:
> +			c += k[4];
> +			b += k[2]+(((uint32_t)k[3])<<16);
> +			a += k[0]+(((uint32_t)k[1])<<16);
> +			break;
> +		case 9:
> +			/* fall through */
> +			c += k8[8];
> +		case 8:
> +			b += k[2]+(((uint32_t)k[3])<<16);
> +			a += k[0]+(((uint32_t)k[1])<<16);
> +			break;
> +		case 7:
> +			/* fall through */
> +			b += ((uint32_t)k8[6])<<16;
> +		case 6:
> +			b += k[2];
> +			a += k[0]+(((uint32_t)k[1])<<16);
> +			break;
> +		case 5:
> +			/* fall through */
> +			b += k8[4];
> +		case 4:
> +			a += k[0]+(((uint32_t)k[1])<<16);
> +			break;
> +		case 3:
> +			/* fall through */
> +			a += ((uint32_t)k8[2])<<16;
> +		case 2:
> +			a += k[0];
> +			break;
> +		case 1:
> +			a += k8[0];
> +			break;
> +		case 0:
> +			/* zero length requires no mixing */
> +			return c;
> +		}
> +#endif

No else block for this ifdef?

> +	} else {
> +		const uint8_t *k = (const uint8_t *)key;
> +
> +		/* all but the last block: affect some 32 bits of (a, b, c) */
> +		while (length > 12) {
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +			a += k[0];
> +			a += ((uint32_t)k[1])<<8;
> +			a += ((uint32_t)k[2])<<16;
> +			a += ((uint32_t)k[3])<<24;
> +			b += k[4];
> +			b += ((uint32_t)k[5])<<8;
> +			b += ((uint32_t)k[6])<<16;
> +			b += ((uint32_t)k[7])<<24;
> +			c += k[8];
> +			c += ((uint32_t)k[9])<<8;
> +			c += ((uint32_t)k[10])<<16;
> +			c += ((uint32_t)k[11])<<24;
> +#else
> +			a += ((uint32_t)k[0])<<24;
> +			a += ((uint32_t)k[1])<<16;
> +			a += ((uint32_t)k[2])<<8;
> +			a += ((uint32_t)k[3]);
> +			b += ((uint32_t)k[4])<<24;
> +			b += ((uint32_t)k[5])<<16;
> +			b += ((uint32_t)k[6])<<8;
> +			b += ((uint32_t)k[7]);
> +			c += ((uint32_t)k[8])<<32;
> +			c += ((uint32_t)k[9])<<16;
> +			c += ((uint32_t)k[10])<<8;
> +			c += ((uint32_t)k[11]);
> +#endif

Maybe find a better way to shorten/remove this #ifdef also. E.g. shorter
ifdef defining macros for the different shift amounts, 0, 8, 16, 24.

> +
> +			__rte_jhash_mix(a, b, c);
>  
> -	__rte_jhash_mix(a,b,c);
> +			k += 12;
> +			length -= 12;
> +		}
> +
> +		/* last block: affect all 32 bits of (c) */
> +		/* all the case statements fall through */
> +		switch (length) {
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +		case 12:
> +			c += ((uint32_t)k[11])<<24;
> +		case 11:
> +			c += ((uint32_t)k[10])<<16;
> +		case 10:
> +			c += ((uint32_t)k[9])<<8;
> +		case 9:
> +			c += k[8];
> +		case 8:
> +			b += ((uint32_t)k[7])<<24;
> +		case 7:
> +			b += ((uint32_t)k[6])<<16;
> +		case 6:
> +			b += ((uint32_t)k[5])<<8;
> +		case 5:
> +			b += k[4];
> +		case 4:
> +			a += ((uint32_t)k[3])<<24;
> +		case 3:
> +			a += ((uint32_t)k[2])<<16;
> +		case 2:
> +			a += ((uint32_t)k[1])<<8;
> +		case 1:
> +			a += k[0];
> +		break;
> +#else
> +		case 12:
> +			c += k[11];
> +		case 11:
> +			c += ((uint32_t)k[10])<<8;
> +		case 10:
> +			c += ((uint32_t)k[9])<<16;
> +		case 9:
> +			c += ((uint32_t)k[8])<<24;
> +		case 8:
> +			b += k[7];
> +		case 7:
> +			b += ((uint32_t)k[6])<<8;
> +		case 6:
> +			b += ((uint32_t)k[5])<<16;
> +		case 5:
> +			b += ((uint32_t)k[4])<<24;
> +		case 4:
> +			a += k[3];
> +		case 3:
> +			a += ((uint32_t)k[2])<<8;
> +		case 2:
> +			a += ((uint32_t)k[1])<<16;
> +		case 1:
> +			a += ((uint32_t)k[0])<<24;
> +		break;
> +#endif
> +		case 0:
> +			return c;
> +		}
> +	}
> +
> +	__rte_jhash_final(a, b, c);
>  
>  	return c;
>  }
> @@ -151,33 +378,93 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  static inline uint32_t
>  rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
>  {
> -	uint32_t a, b, c, len;
> +	uint32_t a, b, c;
>  
> -	a = b = RTE_JHASH_GOLDEN_RATIO;
> -	c = initval;
> -	len = length;
> +	/* Set up the internal state */
> +	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length)<<2) + initval;
>  
> -	while (len >= 3) {
> +	/* Handle most of the key */
> +	while (length > 3) {
>  		a += k[0];
>  		b += k[1];
>  		c += k[2];
> +
>  		__rte_jhash_mix(a, b, c);
> -		k += 3; len -= 3;
> -	}
>  
> -	c += length * 4;
> +		k += 3;
> +		length -= 3;
> +	}
>  
> -	switch (len) {
> -		case 2 : b += k[1];
> -		case 1 : a += k[0];
> -		default: break;
> +	/* Handle the last 3 uint32_t's */
> +	switch (length) {
> +	case 3:
> +		c += k[2];
> +	case 2:
> +		b += k[1];
> +	case 1:
> +		a += k[0];
> +		__rte_jhash_final(a, b, c);
> +	/* case 0: nothing left to add */
> +	case 0:
> +		break;
>  	};
>  
> -	__rte_jhash_mix(a,b,c);
> -
>  	return c;
>  }
>  
> +/**
> + * Same as rte_jhash2, but take two seeds and return two uint32_ts.
> + * pc and pb must be non-null, and *pc and *pb must both be initialized
> + * with seeds. If you pass in (*pb)=0, the output (*pc) will be
> + * the same as the return value from rte_jhash.
> + *
> + * @param k
> + *   Key to calculate hash of.
> + * @param length
> + *   Length of key in units of 4 bytes.
> + * @param pc
> + *   IN: seed OUT: primary hash value.
> + * @param pc
> + *   IN: second seed OUT: secondary hash value.
> + */
> +static inline void
> +rte_jhash_word2(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
> +{
> +	uint32_t a, b, c;
> +
> +	/* Set up the internal state */
> +	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length)<<2) + *pc;
> +	c += *pb;
> +
> +	/* Handle most of the key */
> +	while (length > 3) {
> +		a += k[0];
> +		b += k[1];
> +		c += k[2];
> +
> +		__rte_jhash_mix(a, b, c);
> +
> +		k += 3;
> +		length -= 3;
> +	}
> +
> +	/* Handle the last 3 uint32_t's */
> +	switch (length) {
> +	case 3:
> +		c += k[2];
> +	case 2:
> +		b += k[1];
> +	case 1:
> +		a += k[0];
> +		__rte_jhash_final(a, b, c);
> +	/* case 0: nothing left to add */
> +	case 0:
> +		break;
> +	};
> +
> +	*pc = c;
> +	*pb = b;
> +}
>  
>  /**
>   * A special ultra-optimized versions that knows it is hashing exactly
> -- 
> 1.7.4.1
> 

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH] hash: update jhash function with the latest available
  2015-04-16 14:01 ` Bruce Richardson
@ 2015-04-17 16:03   ` De Lara Guarch, Pablo
  0 siblings, 0 replies; 62+ messages in thread
From: De Lara Guarch, Pablo @ 2015-04-17 16:03 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev



> -----Original Message-----
> From: Richardson, Bruce
> Sent: Thursday, April 16, 2015 3:01 PM
> To: De Lara Guarch, Pablo
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] hash: update jhash function with the latest
> available
> 
> On Thu, Apr 16, 2015 at 02:26:59PM +0100, Pablo de Lara wrote:
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 60% better performance, compared to the original one.
> >
> > Check out: http://burtleburtle.net/bob/c/lookup3.c
> >
> > This patch integrates that code in the rte_jhash library,
> > adding also a new function rte_jhash_word2,
> > that returns two different hash values, for a single key.
> >
> 
> Should the addition of the new functionality not be a separate patch from
> the
> update to the existing code?

True, actually, I miss one extra function (2 in total).

> Also, do the new functions return the exact same values as the previous
> versions,
> just faster?

The new functions return different values from the previous version
AND faster (some cases, MUCH faster)

[...]

> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +		case 12:
> > +			c += k[2]; b += k[1]; a += k[0]; break;
> > +		case 11:
> > +			c += k[2]&0xffffff; b += k[1]; a += k[0]; break;
> > +		case 10:
> > +			c += k[2]&0xffff; b += k[1]; a += k[0]; break;
> > +		case 9:
> > +			c += k[2]&0xff; b += k[1]; a += k[0]; break;
> > +		case 8:
> > +			b += k[1]; a += k[0]; break;
> > +		case 7:
> > +			b += k[1]&0xffffff; a += k[0]; break;
> > +		case 6:
> > +			b += k[1]&0xffff; a += k[0]; break;
> > +		case 5:
> > +			b += k[1]&0xff; a += k[0]; break;
> > +		case 4:
> > +			a += k[0]; break;
> > +		case 3:
> > +			a += k[0]&0xffffff; break;
> > +		case 2:
> > +			a += k[0]&0xffff; break;
> > +		case 1:
> > +			a += k[0]&0xff; break;
> > +#else
> > +		case 12:
> > +			c += k[2]; b += k[1]; a += k[0]; break;
> > +		case 11:
> > +			c += k[2]&0xffffff00; b += k[1]; a += k[0]; break;
> > +		case 10:
> > +			c += k[2]&0xffff0000; b += k[1]; a += k[0]; break;
> > +		case 9:
> > +			c += k[2]&0xff000000; b += k[1]; a += k[0]; break;
> > +		case 8:
> > +			b += k[1]; a += k[0]; break;
> > +		case 7:
> > +			b += k[1]&0xffffff00; a += k[0]; break;
> > +		case 6:
> > +			b += k[1]&0xffff0000; a += k[0]; break;
> > +		case 5:
> > +			b += k[1]&0xff000000; a += k[0]; break;
> > +		case 4:
> > +			a += k[0]; break;
> > +		case 3:
> > +			a += k[0]&0xffffff00; break;
> > +		case 2:
> > +			a += k[0]&0xffff0000; break;
> > +		case 1:
> > +			a += k[0]&0xff000000; break;
> > +#endif
> 
> Only the constants seem different in this block. Can we get rid of the
> #ifdefs using rte_XX_to_cpu() calls instead?

Will add that in next version.

> 
> > +		/* zero length strings require no mixing */
> > +		case 0:
> > +			return c;
> > +		};
> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +	} else if ((u.i & 0x1) == 0) {
> > +		/* read 16-bit chunks */
> > +		const uint16_t *k = (const uint16_t *)key;
> > +		const uint8_t  *k8;
> > +
> > +		/* all but last block: aligned reads and different mixing */
> > +		while (length > 12) {
> > +			a += k[0] + (((uint32_t)k[1])<<16);
> > +			b += k[2] + (((uint32_t)k[3])<<16);
> > +			c += k[4] + (((uint32_t)k[5])<<16);
> > +
> > +			__rte_jhash_mix(a, b, c);
> > +
> > +			k += 6;
> > +			length -= 12;
> > +		}
> > +
> > +		/* handle the last (probably partial) block */
> > +		k8 = (const uint8_t *)k;
> > +		switch (length) {
> > +		case 12:
> > +			c += k[4]+(((uint32_t)k[5])<<16);
> > +			b += k[2]+(((uint32_t)k[3])<<16);
> > +			a += k[0]+(((uint32_t)k[1])<<16);
> > +			break;
> > +		case 11:
> > +			/* fall through */
> > +			c += ((uint32_t)k8[10])<<16;
> > +		case 10:
> > +			c += k[4];
> > +			b += k[2]+(((uint32_t)k[3])<<16);
> > +			a += k[0]+(((uint32_t)k[1])<<16);
> > +			break;
> > +		case 9:
> > +			/* fall through */
> > +			c += k8[8];
> > +		case 8:
> > +			b += k[2]+(((uint32_t)k[3])<<16);
> > +			a += k[0]+(((uint32_t)k[1])<<16);
> > +			break;
> > +		case 7:
> > +			/* fall through */
> > +			b += ((uint32_t)k8[6])<<16;
> > +		case 6:
> > +			b += k[2];
> > +			a += k[0]+(((uint32_t)k[1])<<16);
> > +			break;
> > +		case 5:
> > +			/* fall through */
> > +			b += k8[4];
> > +		case 4:
> > +			a += k[0]+(((uint32_t)k[1])<<16);
> > +			break;
> > +		case 3:
> > +			/* fall through */
> > +			a += ((uint32_t)k8[2])<<16;
> > +		case 2:
> > +			a += k[0];
> > +			break;
> > +		case 1:
> > +			a += k8[0];
> > +			break;
> > +		case 0:
> > +			/* zero length requires no mixing */
> > +			return c;
> > +		}
> > +#endif
> 
> No else block for this ifdef?

According to the code, for big endian, it only covers 4-byte alignment and rest of cases.

> 
> > +	} else {
> > +		const uint8_t *k = (const uint8_t *)key;
> > +
> > +		/* all but the last block: affect some 32 bits of (a, b, c) */
> > +		while (length > 12) {
> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +			a += k[0];
> > +			a += ((uint32_t)k[1])<<8;
> > +			a += ((uint32_t)k[2])<<16;
> > +			a += ((uint32_t)k[3])<<24;
> > +			b += k[4];
> > +			b += ((uint32_t)k[5])<<8;
> > +			b += ((uint32_t)k[6])<<16;
> > +			b += ((uint32_t)k[7])<<24;
> > +			c += k[8];
> > +			c += ((uint32_t)k[9])<<8;
> > +			c += ((uint32_t)k[10])<<16;
> > +			c += ((uint32_t)k[11])<<24;
> > +#else
> > +			a += ((uint32_t)k[0])<<24;
> > +			a += ((uint32_t)k[1])<<16;
> > +			a += ((uint32_t)k[2])<<8;
> > +			a += ((uint32_t)k[3]);
> > +			b += ((uint32_t)k[4])<<24;
> > +			b += ((uint32_t)k[5])<<16;
> > +			b += ((uint32_t)k[6])<<8;
> > +			b += ((uint32_t)k[7]);
> > +			c += ((uint32_t)k[8])<<32;
> > +			c += ((uint32_t)k[9])<<16;
> > +			c += ((uint32_t)k[10])<<8;
> > +			c += ((uint32_t)k[11]);
> > +#endif
> 
> Maybe find a better way to shorten/remove this #ifdef also. E.g. shorter
> ifdef defining macros for the different shift amounts, 0, 8, 16, 24.

Agree. Will change that in v2.

[...]

Thanks for the comments. I will send a v2 soon.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 0/6] update jhash function
  2015-04-16 13:26 [dpdk-dev] [PATCH] hash: update jhash function with the latest available Pablo de Lara
  2015-04-16 14:01 ` Bruce Richardson
@ 2015-04-24 11:23 ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
                     ` (6 more replies)
  1 sibling, 7 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run.

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions 
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

Pablo de Lara (6):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  hash: update jhash function with the latest available
  hash: add two new functions to jhash library
  hash: remove duplicated code
  hash: rename rte_jhash2 to rte_jhash_32b

 app/test/Makefile               |    1 +
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 +-
 app/test/test_hash_func_perf.c  |  145 ++++++++++++++++++
 app/test/test_hash_perf.c       |   71 +---------
 lib/librte_hash/rte_jhash.h     |  313 +++++++++++++++++++++++++++++----------
 6 files changed, 387 insertions(+), 149 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 1/6] test/hash: move hash function perf tests to separate file
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/Makefile              |    1 +
 app/test/test_hash_func_perf.c |  145 ++++++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c      |   71 +-------------------
 3 files changed, 147 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..77a9c42 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_func_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
new file mode 100644
index 0000000..ba31c53
--- /dev/null
+++ b/app/test/test_hash_func_perf.c
@@ -0,0 +1,145 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_cycles.h>
+#include <rte_random.h>
+
+#include "test.h"
+
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+/*******************************************************************************
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 1000000
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+/******************************************************************************/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *get_hash_name(rte_hash_function f)
+{
+	if (f == rte_jhash)
+		return "jhash";
+
+	if (f == rte_hash_crc)
+		return "rte_hash_crc";
+
+	return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
+		uint32_t key_len)
+{
+	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks = 0, start, end;
+	unsigned i, j;
+
+	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+		for (j = 0; j < key_len; j++)
+			key[j] = (uint8_t) rte_rand();
+
+		start = rte_rdtsc();
+		f(key, key_len, init_val);
+		end = rte_rdtsc();
+		ticks += end - start;
+	}
+
+	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
+			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void run_hash_func_tests(void)
+{
+	unsigned i, j, k;
+
+	printf(" *** Hash function performance test results ***\n");
+	printf(" Number of iterations for each test = %d\n",
+			HASHTEST_ITERATIONS);
+	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
+
+	for (i = 0;
+	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i++) {
+		for (j = 0;
+		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
+		     j++) {
+			for (k = 0;
+			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+			     k++) {
+				run_hash_func_test(hashtest_funcs[i],
+						hashtest_initvals[j],
+						hashtest_key_lens[k]);
+			}
+		}
+	}
+}
+
+static int
+test_hash_func_perf(void)
+{
+	run_hash_func_tests();
+
+	return 0;
+}
+
+static struct test_command hash_func_perf_cmd = {
+	.command = "hash_func_perf_autotest",
+	.callback = test_hash_func_perf,
+};
+REGISTER_TEST_COMMAND(hash_func_perf_cmd);
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..d0e5ce0 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -85,20 +85,6 @@ struct tbl_perf_test_params {
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
 /*******************************************************************************
- * Hash function performance test configuration section. Each performance test
- * will be performed HASHTEST_ITERATIONS times.
- *
- * The five arrays below control what tests are performed. Every combination
- * from the array entries is tested.
- */
-#define HASHTEST_ITERATIONS 1000000
-
-static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
-/******************************************************************************/
-
-/*******************************************************************************
  * Hash table performance test configuration section.
  */
 struct tbl_perf_test_params tbl_perf_params[] =
@@ -617,60 +603,6 @@ static int run_all_tbl_perf_tests(void)
 	return 0;
 }
 
-/*
- * Test a hash function.
- */
-static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
-{
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
-	unsigned i, j;
-
-	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
-		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
-	}
-
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
-}
-
-/*
- * Test all hash functions.
- */
-static void run_hash_func_tests(void)
-{
-	unsigned i, j, k;
-
-	printf("\n\n *** Hash function performance test results ***\n");
-	printf(" Number of iterations for each test = %d\n",
-			HASHTEST_ITERATIONS);
-	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
-
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
-			}
-		}
-	}
-}
-
 /* Control operation of performance testing of fbk hash. */
 #define LOAD_FACTOR 0.667	/* How full to make the hash table. */
 #define TEST_SIZE 1000000	/* How many operations to time. */
@@ -757,7 +689,6 @@ test_hash_perf(void)
 {
 	if (run_all_tbl_perf_tests() < 0)
 		return -1;
-	run_hash_func_tests();
 
 	if (fbk_hash_perf_test() < 0)
 		return -1;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 2/6] test/hash: improve accuracy on cycle measurements
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 3/6] hash: update jhash function with the latest available Pablo de Lara
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_func_perf.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
index ba31c53..004c9be 100644
--- a/app/test/test_hash_func_perf.c
+++ b/app/test/test_hash_func_perf.c
@@ -82,21 +82,21 @@ static const char *get_hash_name(rte_hash_function f)
 static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
+	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks, start, end;
 	unsigned i, j;
 
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
 		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
+			key[i][j] = (uint8_t) rte_rand();
 	}
 
+	start = rte_rdtsc();
+	for (i = 0; i < HASHTEST_ITERATIONS; i++)
+		f(key[i], key_len, init_val);
+	end = rte_rdtsc();
+	ticks = end - start;
+
 	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
 			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 3/6] hash: update jhash function with the latest available
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 4/6] hash: add two new functions to jhash library Pablo de Lara
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  234 ++++++++++++++++++++++++++++++++-----------
 1 files changed, 174 insertions(+), 60 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..4ec1c39 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,68 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <string.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define RTE_JHASH_BYTE0_SHIFT 0
+#define RTE_JHASH_BYTE1_SHIFT 8
+#define RTE_JHASH_BYTE2_SHIFT 16
+#define RTE_JHASH_BYTE3_SHIFT 24
+#else
+#define RTE_JHASH_BYTE0_SHIFT 24
+#define RTE_JHASH_BYTE1_SHIFT 16
+#define RTE_JHASH_BYTE2_SHIFT 8
+#define RTE_JHASH_BYTE3_SHIFT 0
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +125,119 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
+	union {
+		const void *ptr;
+		size_t i;
+	} u;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	u.ptr = key;
 
-		__rte_jhash_mix(a,b,c);
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+		const uint32_t *k = (const uint32_t *)key;
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+	} else {
+		const uint8_t *k = (const uint8_t *)key;
+
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
 
-	__rte_jhash_mix(a,b,c);
+			__rte_jhash_mix(a, b, c);
+
+			k += 12;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		/* all the case statements fall through */
+		switch (length) {
+		case 12:
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+		case 11:
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+		case 10:
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+		case 9:
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+		case 8:
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+		case 7:
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+		case 6:
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+		case 5:
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+		case 4:
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+		case 3:
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+		case 2:
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+		case 1:
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+		break;
+		case 0:
+			return c;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,30 +258,37 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 4/6] hash: add two new functions to jhash library
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
                     ` (2 preceding siblings ...)
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 3/6] hash: update jhash function with the latest available Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 5/6] hash: remove duplicated code Pablo de Lara
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
                     and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
                     and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  194 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 194 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 4ec1c39..1e69e40 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -292,6 +292,200 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 	return c;
 }
 
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+	union {
+		const void *ptr;
+		size_t i;
+	} u;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	u.ptr = key;
+
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+		const uint32_t *k = (const uint32_t *)key;
+
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		const uint8_t *k = (const uint8_t *)key;
+
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 12;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		/* all the case statements fall through */
+		switch (length) {
+		case 12:
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+		case 11:
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+		case 10:
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+		case 9:
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+		case 8:
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+		case 7:
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+		case 6:
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+		case 5:
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+		case 4:
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+		case 3:
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+		case 2:
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+		case 1:
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+		break;
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash2, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash2.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 5/6] hash: remove duplicated code
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
                     ` (3 preceding siblings ...)
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 4/6] hash: add two new functions to jhash library Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

rte_jhash is basically like _rte_jhash_2hashes but it returns only 1 hash, instead of 2.
In order to remove duplicated code, rte_jhash calls _rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is required.
So,to avoid duplicated code, the function calls _rte_jhash_2hashes with check_align = 0
(to use the optimal path)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  283 ++++++++++---------------------------------
 1 files changed, 62 insertions(+), 221 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 1e69e40..9f5b833 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -108,22 +108,8 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb, unsigned check_align)
 {
 	uint32_t a, b, c;
 	union {
@@ -132,12 +118,18 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 	} u;
 
 	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
 
 	u.ptr = key;
 
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
+
+	if ((!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686")
+		|| (!check_align) || (u.i & 0x3) == 0)) {
 		const uint32_t *k = (const uint32_t *)key;
 
 		while (length > 12) {
@@ -178,7 +170,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += k[0] & LOWER8b_MASK; break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		};
 	} else {
 		const uint8_t *k = (const uint8_t *)key;
@@ -233,63 +227,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
 		break;
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		}
 	}
 
 	__rte_jhash_final(a, b, c);
 
-	return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-
-		__rte_jhash_mix(a, b, c);
-
-		k += 3;
-		length -= 3;
-	}
-
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
-
-	return c;
+	*pc = c;
+	*pb = b;
 }
 
 /**
@@ -310,127 +257,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
-	union {
-		const void *ptr;
-		size_t i;
-	} u;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
-	c += *pb;
-
-	u.ptr = key;
-
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
-		const uint32_t *k = (const uint32_t *)key;
-
-		while (length > 12) {
-			a += k[0];
-			b += k[1];
-			c += k[2];
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		switch (length) {
-		case 12:
-			c += k[2]; b += k[1]; a += k[0]; break;
-		case 11:
-			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
-		case 10:
-			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
-		case 9:
-			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
-		case 8:
-			b += k[1]; a += k[0]; break;
-		case 7:
-			b += k[1] & LOWER24b_MASK; a += k[0]; break;
-		case 6:
-			b += k[1] & LOWER16b_MASK; a += k[0]; break;
-		case 5:
-			b += k[1] & LOWER8b_MASK; a += k[0]; break;
-		case 4:
-			a += k[0]; break;
-		case 3:
-			a += k[0] & LOWER24b_MASK; break;
-		case 2:
-			a += k[0] & LOWER16b_MASK; break;
-		case 1:
-			a += k[0] & LOWER8b_MASK; break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		};
-	} else {
-		const uint8_t *k = (const uint8_t *)key;
-
-		/* all but the last block: affect some 32 bits of (a, b, c) */
-		while (length > 12) {
-			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
-			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
-			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
-			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
-			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
-			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
-			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
-			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
-			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
-			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
-			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
-			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 12;
-			length -= 12;
-		}
-
-		/* last block: affect all 32 bits of (c) */
-		/* all the case statements fall through */
-		switch (length) {
-		case 12:
-			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
-		case 11:
-			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
-		case 10:
-			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
-		case 9:
-			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
-		case 8:
-			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
-		case 7:
-			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
-		case 6:
-			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
-		case 5:
-			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
-		case 4:
-			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
-		case 3:
-			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
-		case 2:
-			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
-		case 1:
-			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
-		break;
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		}
-	}
-
-	__rte_jhash_final(a, b, c);
-
-	*pc = c;
-	*pb = b;
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
 }
 
 /**
@@ -451,40 +278,54 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 static inline void
 rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
 
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
-	c += *pb;
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
+	rte_jhash_2hashes(key, length, &initval, &initval2);
 
-		__rte_jhash_mix(a, b, c);
+	return initval;
+}
 
-		k += 3;
-		length -= 3;
-	}
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
+	rte_jhash2_2hashes(k, length, &initval, &initval2);
 
-	*pc = c;
-	*pb = b;
+	return initval;
 }
 
 /**
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v2 6/6] hash: rename rte_jhash2 to rte_jhash_32b
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
                     ` (4 preceding siblings ...)
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 5/6] hash: remove duplicated code Pablo de Lara
@ 2015-04-24 11:23   ` Pablo de Lara
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-04-24 11:23 UTC (permalink / raw)
  To: dev

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 ++--
 lib/librte_hash/rte_jhash.h     |   18 +++++++++++++++---
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
 		.entries = 16,
 		.bucket_entries = 4,
 		.key_len = 4,
-		.hash_func = (rte_hash_function)rte_jhash2,
+		.hash_func = (rte_hash_function)rte_jhash_32b,
 		.hash_func_init_val = 0,
 		.socket_id = 0,
 	};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)
 
 	hash_params_ex.name = "hash_test_jhash2";
 	hash_params_ex.key_len = 4;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)
 
 	hash_params_ex.name = "hash_test_2_jhash2";
 	hash_params_ex.key_len = 8;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL)
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 9f5b833..549e95c 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+
+#include <rte_log.h>
 #include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
@@ -276,7 +278,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
 	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -319,15 +321,25 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
  *   Calculated hash value.
  */
 static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
 {
 	uint32_t initval2 = 0;
 
-	rte_jhash2_2hashes(k, length, &initval, &initval2);
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
 
 	return initval;
 }
 
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
  * 3 words.
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 0/6] update jhash function
  2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
                     ` (5 preceding siblings ...)
  2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-05-05 14:43   ` Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
                       ` (6 more replies)
  6 siblings, 7 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run.

changes in v3:

- Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
  functions

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

De Lara Guarch, Pablo (1):
  hash: rename rte_jhash2 to rte_jhash_32b

Pablo de Lara (5):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  hash: update jhash function with the latest available
  hash: add two new functions to jhash library
  hash: remove duplicated code

 app/test/Makefile               |    1 +
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 +-
 app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
 app/test/test_hash_perf.c       |   71 +--------
 lib/librte_hash/rte_jhash.h     |  336 +++++++++++++++++++++++++++++----------
 6 files changed, 400 insertions(+), 159 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
                       ` (5 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/Makefile              |    1 +
 app/test/test_hash_func_perf.c |  145 ++++++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c      |   71 +-------------------
 3 files changed, 147 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..77a9c42 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_func_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
new file mode 100644
index 0000000..ba31c53
--- /dev/null
+++ b/app/test/test_hash_func_perf.c
@@ -0,0 +1,145 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_cycles.h>
+#include <rte_random.h>
+
+#include "test.h"
+
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+/*******************************************************************************
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 1000000
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+/******************************************************************************/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *get_hash_name(rte_hash_function f)
+{
+	if (f == rte_jhash)
+		return "jhash";
+
+	if (f == rte_hash_crc)
+		return "rte_hash_crc";
+
+	return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
+		uint32_t key_len)
+{
+	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks = 0, start, end;
+	unsigned i, j;
+
+	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+		for (j = 0; j < key_len; j++)
+			key[j] = (uint8_t) rte_rand();
+
+		start = rte_rdtsc();
+		f(key, key_len, init_val);
+		end = rte_rdtsc();
+		ticks += end - start;
+	}
+
+	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
+			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void run_hash_func_tests(void)
+{
+	unsigned i, j, k;
+
+	printf(" *** Hash function performance test results ***\n");
+	printf(" Number of iterations for each test = %d\n",
+			HASHTEST_ITERATIONS);
+	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
+
+	for (i = 0;
+	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i++) {
+		for (j = 0;
+		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
+		     j++) {
+			for (k = 0;
+			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+			     k++) {
+				run_hash_func_test(hashtest_funcs[i],
+						hashtest_initvals[j],
+						hashtest_key_lens[k]);
+			}
+		}
+	}
+}
+
+static int
+test_hash_func_perf(void)
+{
+	run_hash_func_tests();
+
+	return 0;
+}
+
+static struct test_command hash_func_perf_cmd = {
+	.command = "hash_func_perf_autotest",
+	.callback = test_hash_func_perf,
+};
+REGISTER_TEST_COMMAND(hash_func_perf_cmd);
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..d0e5ce0 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -85,20 +85,6 @@ struct tbl_perf_test_params {
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
 /*******************************************************************************
- * Hash function performance test configuration section. Each performance test
- * will be performed HASHTEST_ITERATIONS times.
- *
- * The five arrays below control what tests are performed. Every combination
- * from the array entries is tested.
- */
-#define HASHTEST_ITERATIONS 1000000
-
-static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
-/******************************************************************************/
-
-/*******************************************************************************
  * Hash table performance test configuration section.
  */
 struct tbl_perf_test_params tbl_perf_params[] =
@@ -617,60 +603,6 @@ static int run_all_tbl_perf_tests(void)
 	return 0;
 }
 
-/*
- * Test a hash function.
- */
-static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
-{
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
-	unsigned i, j;
-
-	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
-		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
-	}
-
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
-}
-
-/*
- * Test all hash functions.
- */
-static void run_hash_func_tests(void)
-{
-	unsigned i, j, k;
-
-	printf("\n\n *** Hash function performance test results ***\n");
-	printf(" Number of iterations for each test = %d\n",
-			HASHTEST_ITERATIONS);
-	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
-
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
-			}
-		}
-	}
-}
-
 /* Control operation of performance testing of fbk hash. */
 #define LOAD_FACTOR 0.667	/* How full to make the hash table. */
 #define TEST_SIZE 1000000	/* How many operations to time. */
@@ -757,7 +689,6 @@ test_hash_perf(void)
 {
 	if (run_all_tbl_perf_tests() < 0)
 		return -1;
-	run_hash_func_tests();
 
 	if (fbk_hash_perf_test() < 0)
 		return -1;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 2/6] test/hash: improve accuracy on cycle measurements
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available Pablo de Lara
                       ` (4 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_func_perf.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
index ba31c53..004c9be 100644
--- a/app/test/test_hash_func_perf.c
+++ b/app/test/test_hash_func_perf.c
@@ -82,21 +82,21 @@ static const char *get_hash_name(rte_hash_function f)
 static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
+	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks, start, end;
 	unsigned i, j;
 
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
 		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
+			key[i][j] = (uint8_t) rte_rand();
 	}
 
+	start = rte_rdtsc();
+	for (i = 0; i < HASHTEST_ITERATIONS; i++)
+		f(key[i], key_len, init_val);
+	end = rte_rdtsc();
+	ticks = end - start;
+
 	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
 			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-06  0:35       ` Ananyev, Konstantin
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 4/6] hash: add two new functions to jhash library Pablo de Lara
                       ` (3 subsequent siblings)
  6 siblings, 1 reply; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  261 +++++++++++++++++++++++++++++++------------
 1 files changed, 188 insertions(+), 73 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..0e96b7c 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,68 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <string.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define RTE_JHASH_BYTE0_SHIFT 0
+#define RTE_JHASH_BYTE1_SHIFT 8
+#define RTE_JHASH_BYTE2_SHIFT 16
+#define RTE_JHASH_BYTE3_SHIFT 24
+#else
+#define RTE_JHASH_BYTE0_SHIFT 24
+#define RTE_JHASH_BYTE1_SHIFT 16
+#define RTE_JHASH_BYTE2_SHIFT 8
+#define RTE_JHASH_BYTE3_SHIFT 0
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +125,119 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
+	union {
+		const void *ptr;
+		size_t i;
+	} u;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	u.ptr = key;
 
-		__rte_jhash_mix(a,b,c);
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+		const uint32_t *k = (const uint32_t *)key;
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+	} else {
+		const uint8_t *k = (const uint8_t *)key;
+
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 12;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		/* all the case statements fall through */
+		switch (length) {
+		case 12:
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+		case 11:
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+		case 10:
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+		case 9:
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+		case 8:
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+		case 7:
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+		case 6:
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+		case 5:
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+		case 4:
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+		case 3:
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+		case 2:
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+		case 1:
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+		break;
+		case 0:
+			return c;
+		}
+	}
 
-	__rte_jhash_mix(a,b,c);
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,33 +258,51 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
+static inline uint32_t
+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += RTE_JHASH_GOLDEN_RATIO + initval;
+	b += RTE_JHASH_GOLDEN_RATIO + initval;
+	c += RTE_JHASH_GOLDEN_RATIO + initval;
+
+	__rte_jhash_final(a, b, c);
+
+	return c;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
@@ -197,17 +322,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-	a += RTE_JHASH_GOLDEN_RATIO;
-	b += RTE_JHASH_GOLDEN_RATIO;
-	c += initval;
-
-	__rte_jhash_mix(a, b, c);
-
-	/*
-	 * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
-	 *       normally done at the end is not done here.
-	 */
-	return c;
+	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
 }
 
 /**
@@ -226,7 +341,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 static inline uint32_t
 rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 {
-	return rte_jhash_3words(a, b, 0, initval);
+	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
 }
 
 /**
@@ -243,7 +358,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 static inline uint32_t
 rte_jhash_1word(uint32_t a, uint32_t initval)
 {
-	return rte_jhash_3words(a, 0, 0, initval);
+	return __rte_jhash_3words(a + 4, 4, 4, initval);
 }
 
 #ifdef __cplusplus
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 4/6] hash: add two new functions to jhash library
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
                       ` (2 preceding siblings ...)
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 5/6] hash: remove duplicated code Pablo de Lara
                       ` (2 subsequent siblings)
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
                     and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
                     and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  195 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 195 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 0e96b7c..9e82d06 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -292,6 +292,201 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 	return c;
 }
 
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+	union {
+		const void *ptr;
+		size_t i;
+	} u;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	u.ptr = key;
+
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+		const uint32_t *k = (const uint32_t *)key;
+
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		const uint8_t *k = (const uint8_t *)key;
+
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 12;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		/* all the case statements fall through */
+		switch (length) {
+		case 12:
+			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
+		case 11:
+			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
+		case 10:
+			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
+		case 9:
+			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
+		case 8:
+			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
+		case 7:
+			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
+		case 6:
+			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
+		case 5:
+			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
+		case 4:
+			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
+		case 3:
+			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
+		case 2:
+			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
+		case 1:
+			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
+		break;
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash2, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash2.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
+
 static inline uint32_t
 __rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 5/6] hash: remove duplicated code
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
                       ` (3 preceding siblings ...)
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 4/6] hash: add two new functions to jhash library Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

rte_jhash is basically like _rte_jhash_2hashes but it returns only 1 hash, instead of 2.
In order to remove duplicated code, rte_jhash calls _rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is required.
So,to avoid duplicated code, the function calls _rte_jhash_2hashes with check_align = 0
(to use the optimal path)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  283 ++++++++++---------------------------------
 1 files changed, 62 insertions(+), 221 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 9e82d06..6f05c4c 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -108,22 +108,8 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb, unsigned check_align)
 {
 	uint32_t a, b, c;
 	union {
@@ -132,12 +118,18 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 	} u;
 
 	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
 
 	u.ptr = key;
 
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
+
+	if ((!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686")
+		|| (!check_align) || (u.i & 0x3) == 0)) {
 		const uint32_t *k = (const uint32_t *)key;
 
 		while (length > 12) {
@@ -178,7 +170,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += k[0] & LOWER8b_MASK; break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		};
 	} else {
 		const uint8_t *k = (const uint8_t *)key;
@@ -233,63 +227,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
 		break;
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		}
 	}
 
 	__rte_jhash_final(a, b, c);
 
-	return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-
-		__rte_jhash_mix(a, b, c);
-
-		k += 3;
-		length -= 3;
-	}
-
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
-
-	return c;
+	*pc = c;
+	*pb = b;
 }
 
 /**
@@ -310,127 +257,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
-	union {
-		const void *ptr;
-		size_t i;
-	} u;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
-	c += *pb;
-
-	u.ptr = key;
-
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {
-		const uint32_t *k = (const uint32_t *)key;
-
-		while (length > 12) {
-			a += k[0];
-			b += k[1];
-			c += k[2];
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		switch (length) {
-		case 12:
-			c += k[2]; b += k[1]; a += k[0]; break;
-		case 11:
-			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
-		case 10:
-			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
-		case 9:
-			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
-		case 8:
-			b += k[1]; a += k[0]; break;
-		case 7:
-			b += k[1] & LOWER24b_MASK; a += k[0]; break;
-		case 6:
-			b += k[1] & LOWER16b_MASK; a += k[0]; break;
-		case 5:
-			b += k[1] & LOWER8b_MASK; a += k[0]; break;
-		case 4:
-			a += k[0]; break;
-		case 3:
-			a += k[0] & LOWER24b_MASK; break;
-		case 2:
-			a += k[0] & LOWER16b_MASK; break;
-		case 1:
-			a += k[0] & LOWER8b_MASK; break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		};
-	} else {
-		const uint8_t *k = (const uint8_t *)key;
-
-		/* all but the last block: affect some 32 bits of (a, b, c) */
-		while (length > 12) {
-			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
-			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
-			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
-			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
-			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
-			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
-			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
-			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
-			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
-			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
-			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
-			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 12;
-			length -= 12;
-		}
-
-		/* last block: affect all 32 bits of (c) */
-		/* all the case statements fall through */
-		switch (length) {
-		case 12:
-			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
-		case 11:
-			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
-		case 10:
-			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
-		case 9:
-			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
-		case 8:
-			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
-		case 7:
-			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
-		case 6:
-			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
-		case 5:
-			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
-		case 4:
-			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
-		case 3:
-			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
-		case 2:
-			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
-		case 1:
-			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
-		break;
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		}
-	}
-
-	__rte_jhash_final(a, b, c);
-
-	*pc = c;
-	*pb = b;
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
 }
 
 /**
@@ -451,40 +278,54 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 static inline void
 rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
 
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
-	c += *pb;
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
+	rte_jhash_2hashes(key, length, &initval, &initval2);
 
-		__rte_jhash_mix(a, b, c);
+	return initval;
+}
 
-		k += 3;
-		length -= 3;
-	}
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
+	rte_jhash2_2hashes(k, length, &initval, &initval2);
 
-	*pc = c;
-	*pb = b;
+	return initval;
 }
 
 static inline uint32_t
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v3 6/6] hash: rename rte_jhash2 to rte_jhash_32b
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
                       ` (4 preceding siblings ...)
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 5/6] hash: remove duplicated code Pablo de Lara
@ 2015-05-05 14:43     ` Pablo de Lara
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
  6 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-05 14:43 UTC (permalink / raw)
  To: dev

From: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 ++--
 lib/librte_hash/rte_jhash.h     |   17 +++++++++++++++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
 		.entries = 16,
 		.bucket_entries = 4,
 		.key_len = 4,
-		.hash_func = (rte_hash_function)rte_jhash2,
+		.hash_func = (rte_hash_function)rte_jhash_32b,
 		.hash_func_init_val = 0,
 		.socket_id = 0,
 	};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)
 
 	hash_params_ex.name = "hash_test_jhash2";
 	hash_params_ex.key_len = 4;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)
 
 	hash_params_ex.name = "hash_test_2_jhash2";
 	hash_params_ex.key_len = 8;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL)
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 6f05c4c..3a6e3f2 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+
+#include <rte_log.h>
 #include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
@@ -276,7 +278,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
 	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -319,11 +321,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
  *   Calculated hash value.
  */
 static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
+
+static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
 	uint32_t initval2 = 0;
 
-	rte_jhash2_2hashes(k, length, &initval, &initval2);
+	RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
 
 	return initval;
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available Pablo de Lara
@ 2015-05-06  0:35       ` Ananyev, Konstantin
  2015-05-06  9:36         ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 62+ messages in thread
From: Ananyev, Konstantin @ 2015-05-06  0:35 UTC (permalink / raw)
  To: De Lara Guarch, Pablo, dev


Hi Pablo,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> Sent: Tuesday, May 05, 2015 3:44 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
> 
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> This patch integrates that code into the rte_jhash library.
> 
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> ---
>  lib/librte_hash/rte_jhash.h |  261 +++++++++++++++++++++++++++++++------------
>  1 files changed, 188 insertions(+), 73 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> index a4bf5a1..0e96b7c 100644
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -45,38 +45,68 @@ extern "C" {
>  #endif
> 
>  #include <stdint.h>
> +#include <string.h>
> +#include <rte_byteorder.h>
> 
>  /* jhash.h: Jenkins hash support.
>   *
> - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
> + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
>   *
>   * http://burtleburtle.net/bob/hash/
>   *
>   * These are the credits from Bob's sources:
>   *
> - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> - * hash(), hash2(), hash3, and mix() are externally useful functions.
> - * Routines to test the hash are included if SELF_TEST is defined.
> - * You can use this free for any purpose.  It has no warranty.
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
>   *
>   * $FreeBSD$
>   */
> 
> +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
> +
>  /** @internal Internal function. NOTE: Arguments are modified. */
>  #define __rte_jhash_mix(a, b, c) do { \
> -	a -= b; a -= c; a ^= (c>>13); \
> -	b -= c; b -= a; b ^= (a<<8); \
> -	c -= a; c -= b; c ^= (b>>13); \
> -	a -= b; a -= c; a ^= (c>>12); \
> -	b -= c; b -= a; b ^= (a<<16); \
> -	c -= a; c -= b; c ^= (b>>5); \
> -	a -= b; a -= c; a ^= (c>>3); \
> -	b -= c; b -= a; b ^= (a<<10); \
> -	c -= a; c -= b; c ^= (b>>15); \
> +	a -= c; a ^= rot(c, 4); c += b; \
> +	b -= a; b ^= rot(a, 6); a += c; \
> +	c -= b; c ^= rot(b, 8); b += a; \
> +	a -= c; a ^= rot(c, 16); c += b; \
> +	b -= a; b ^= rot(a, 19); a += c; \
> +	c -= b; c ^= rot(b, 4); b += a; \
> +} while (0)
> +
> +#define __rte_jhash_final(a, b, c) do { \
> +	c ^= b; c -= rot(b, 14); \
> +	a ^= c; a -= rot(c, 11); \
> +	b ^= a; b -= rot(a, 25); \
> +	c ^= b; c -= rot(b, 16); \
> +	a ^= c; a -= rot(c, 4);  \
> +	b ^= a; b -= rot(a, 14); \
> +	c ^= b; c -= rot(b, 24); \
>  } while (0)
> 
>  /** The golden ratio: an arbitrary value. */
> -#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
> +#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
> +
> +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> +#define RTE_JHASH_BYTE0_SHIFT 0
> +#define RTE_JHASH_BYTE1_SHIFT 8
> +#define RTE_JHASH_BYTE2_SHIFT 16
> +#define RTE_JHASH_BYTE3_SHIFT 24
> +#else
> +#define RTE_JHASH_BYTE0_SHIFT 24
> +#define RTE_JHASH_BYTE1_SHIFT 16
> +#define RTE_JHASH_BYTE2_SHIFT 8
> +#define RTE_JHASH_BYTE3_SHIFT 0
> +#endif
> +
> +#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
> +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
> +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
> 
>  /**
>   * The most generic version, hashes an arbitrary sequence
> @@ -95,42 +125,119 @@ extern "C" {
>  static inline uint32_t
>  rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  {
> -	uint32_t a, b, c, len;
> -	const uint8_t *k = (const uint8_t *)key;
> -	const uint32_t *k32 = (const uint32_t *)key;
> +	uint32_t a, b, c;
> +	union {
> +		const void *ptr;
> +		size_t i;
> +	} u;
> 
> -	len = length;
> -	a = b = RTE_JHASH_GOLDEN_RATIO;
> -	c = initval;
> +	/* Set up the internal state */
> +	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
> 
> -	while (len >= 12) {
> -		a += k32[0];
> -		b += k32[1];
> -		c += k32[2];
> +	u.ptr = key;
> 
> -		__rte_jhash_mix(a,b,c);
> +	/* Check key alignment. For x86 architecture, first case is always optimal */
> +	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i & 0x3) == 0) {

Wonder why strcmp(), why not something like: 'if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)' as in all other places?
Another question what would be in case of RTE_ARCH="x86_x32"?
Konstantin

> +		const uint32_t *k = (const uint32_t *)key;
> 
> -		k += (3 * sizeof(uint32_t)), k32 += 3;
> -		len -= (3 * sizeof(uint32_t));
> -	}
> +		while (length > 12) {
> +			a += k[0];
> +			b += k[1];
> +			c += k[2];
> 
> -	c += length;
> -	switch (len) {
> -		case 11: c += ((uint32_t)k[10] << 24);
> -		case 10: c += ((uint32_t)k[9] << 16);
> -		case 9 : c += ((uint32_t)k[8] << 8);
> -		case 8 : b += ((uint32_t)k[7] << 24);
> -		case 7 : b += ((uint32_t)k[6] << 16);
> -		case 6 : b += ((uint32_t)k[5] << 8);
> -		case 5 : b += k[4];
> -		case 4 : a += ((uint32_t)k[3] << 24);
> -		case 3 : a += ((uint32_t)k[2] << 16);
> -		case 2 : a += ((uint32_t)k[1] << 8);
> -		case 1 : a += k[0];
> -		default: break;
> -	};
> +			__rte_jhash_mix(a, b, c);
> +
> +			k += 3;
> +			length -= 12;
> +		}
> +
> +		switch (length) {
> +		case 12:
> +			c += k[2]; b += k[1]; a += k[0]; break;
> +		case 11:
> +			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
> +		case 10:
> +			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
> +		case 9:
> +			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
> +		case 8:
> +			b += k[1]; a += k[0]; break;
> +		case 7:
> +			b += k[1] & LOWER24b_MASK; a += k[0]; break;
> +		case 6:
> +			b += k[1] & LOWER16b_MASK; a += k[0]; break;
> +		case 5:
> +			b += k[1] & LOWER8b_MASK; a += k[0]; break;
> +		case 4:
> +			a += k[0]; break;
> +		case 3:
> +			a += k[0] & LOWER24b_MASK; break;
> +		case 2:
> +			a += k[0] & LOWER16b_MASK; break;
> +		case 1:
> +			a += k[0] & LOWER8b_MASK; break;
> +		/* zero length strings require no mixing */
> +		case 0:
> +			return c;
> +		};
> +	} else {
> +		const uint8_t *k = (const uint8_t *)key;
> +
> +		/* all but the last block: affect some 32 bits of (a, b, c) */
> +		while (length > 12) {
> +			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
> +			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
> +			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
> +			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
> +			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
> +			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
> +			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
> +			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
> +			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
> +			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
> +			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
> +			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
> +
> +			__rte_jhash_mix(a, b, c);
> +
> +			k += 12;
> +			length -= 12;
> +		}
> +
> +		/* last block: affect all 32 bits of (c) */
> +		/* all the case statements fall through */
> +		switch (length) {
> +		case 12:
> +			c += ((uint32_t)k[11]) << RTE_JHASH_BYTE3_SHIFT;
> +		case 11:
> +			c += ((uint32_t)k[10]) << RTE_JHASH_BYTE2_SHIFT;
> +		case 10:
> +			c += ((uint32_t)k[9]) << RTE_JHASH_BYTE1_SHIFT;
> +		case 9:
> +			c += ((uint32_t)k[8]) << RTE_JHASH_BYTE0_SHIFT;
> +		case 8:
> +			b += ((uint32_t)k[7]) << RTE_JHASH_BYTE3_SHIFT;
> +		case 7:
> +			b += ((uint32_t)k[6]) << RTE_JHASH_BYTE2_SHIFT;
> +		case 6:
> +			b += ((uint32_t)k[5]) << RTE_JHASH_BYTE1_SHIFT;
> +		case 5:
> +			b += ((uint32_t)k[4]) << RTE_JHASH_BYTE0_SHIFT;
> +		case 4:
> +			a += ((uint32_t)k[3]) << RTE_JHASH_BYTE3_SHIFT;
> +		case 3:
> +			a += ((uint32_t)k[2]) << RTE_JHASH_BYTE2_SHIFT;
> +		case 2:
> +			a += ((uint32_t)k[1]) << RTE_JHASH_BYTE1_SHIFT;
> +		case 1:
> +			a += ((uint32_t)k[0]) << RTE_JHASH_BYTE0_SHIFT;
> +		break;
> +		case 0:
> +			return c;
> +		}
> +	}
> 
> -	__rte_jhash_mix(a,b,c);
> +	__rte_jhash_final(a, b, c);
> 
>  	return c;
>  }
> @@ -151,33 +258,51 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  static inline uint32_t
>  rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
>  {
> -	uint32_t a, b, c, len;
> +	uint32_t a, b, c;
> 
> -	a = b = RTE_JHASH_GOLDEN_RATIO;
> -	c = initval;
> -	len = length;
> +	/* Set up the internal state */
> +	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
> 
> -	while (len >= 3) {
> +	/* Handle most of the key */
> +	while (length > 3) {
>  		a += k[0];
>  		b += k[1];
>  		c += k[2];
> +
>  		__rte_jhash_mix(a, b, c);
> -		k += 3; len -= 3;
> -	}
> 
> -	c += length * 4;
> +		k += 3;
> +		length -= 3;
> +	}
> 
> -	switch (len) {
> -		case 2 : b += k[1];
> -		case 1 : a += k[0];
> -		default: break;
> +	/* Handle the last 3 uint32_t's */
> +	switch (length) {
> +	case 3:
> +		c += k[2];
> +	case 2:
> +		b += k[1];
> +	case 1:
> +		a += k[0];
> +		__rte_jhash_final(a, b, c);
> +	/* case 0: nothing left to add */
> +	case 0:
> +		break;
>  	};
> 
> -	__rte_jhash_mix(a,b,c);
> -
>  	return c;
>  }
> 
> +static inline uint32_t
> +__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
> +{
> +	a += RTE_JHASH_GOLDEN_RATIO + initval;
> +	b += RTE_JHASH_GOLDEN_RATIO + initval;
> +	c += RTE_JHASH_GOLDEN_RATIO + initval;
> +
> +	__rte_jhash_final(a, b, c);
> +
> +	return c;
> +}
> 
>  /**
>   * A special ultra-optimized versions that knows it is hashing exactly
> @@ -197,17 +322,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
>  static inline uint32_t
>  rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
>  {
> -	a += RTE_JHASH_GOLDEN_RATIO;
> -	b += RTE_JHASH_GOLDEN_RATIO;
> -	c += initval;
> -
> -	__rte_jhash_mix(a, b, c);
> -
> -	/*
> -	 * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
> -	 *       normally done at the end is not done here.
> -	 */
> -	return c;
> +	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
>  }
> 
>  /**
> @@ -226,7 +341,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
>  static inline uint32_t
>  rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
>  {
> -	return rte_jhash_3words(a, b, 0, initval);
> +	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
>  }
> 
>  /**
> @@ -243,7 +358,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
>  static inline uint32_t
>  rte_jhash_1word(uint32_t a, uint32_t initval)
>  {
> -	return rte_jhash_3words(a, 0, 0, initval);
> +	return __rte_jhash_3words(a + 4, 4, 4, initval);
>  }
> 
>  #ifdef __cplusplus
> --
> 1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
  2015-05-06  0:35       ` Ananyev, Konstantin
@ 2015-05-06  9:36         ` De Lara Guarch, Pablo
  2015-05-06 16:11           ` Ananyev, Konstantin
  2015-05-07 11:11           ` Ananyev, Konstantin
  0 siblings, 2 replies; 62+ messages in thread
From: De Lara Guarch, Pablo @ 2015-05-06  9:36 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev

Hi Konstantin,

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Wednesday, May 06, 2015 1:36 AM
> To: De Lara Guarch, Pablo; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> latest available
> 
> 
> Hi Pablo,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> > Sent: Tuesday, May 05, 2015 3:44 PM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> latest available
> >
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 60% better performance, compared to the original one.
> >
> > This patch integrates that code into the rte_jhash library.
> >
> > Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> > ---
> >  lib/librte_hash/rte_jhash.h |  261
> +++++++++++++++++++++++++++++++------------
> >  1 files changed, 188 insertions(+), 73 deletions(-)
> >
> > diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> > index a4bf5a1..0e96b7c 100644
> > --- a/lib/librte_hash/rte_jhash.h
> > +++ b/lib/librte_hash/rte_jhash.h
> > @@ -1,7 +1,7 @@
> >  /*-
> >   *   BSD LICENSE
> >   *
> > - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> >   *   All rights reserved.
> >   *
> >   *   Redistribution and use in source and binary forms, with or without
> > @@ -45,38 +45,68 @@ extern "C" {
> >  #endif
> >
> >  #include <stdint.h>
> > +#include <string.h>
> > +#include <rte_byteorder.h>
> >
> >  /* jhash.h: Jenkins hash support.
> >   *
> > - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
> > + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
> >   *
> >   * http://burtleburtle.net/bob/hash/
> >   *
> >   * These are the credits from Bob's sources:
> >   *
> > - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> > - * hash(), hash2(), hash3, and mix() are externally useful functions.
> > - * Routines to test the hash are included if SELF_TEST is defined.
> > - * You can use this free for any purpose.  It has no warranty.
> > + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> > + *
> > + * These are functions for producing 32-bit hashes for hash table lookup.
> > + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> > + * are externally useful functions.  Routines to test the hash are included
> > + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> > + * the public domain.  It has no warranty.
> >   *
> >   * $FreeBSD$
> >   */
> >
> > +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
> > +
> >  /** @internal Internal function. NOTE: Arguments are modified. */
> >  #define __rte_jhash_mix(a, b, c) do { \
> > -	a -= b; a -= c; a ^= (c>>13); \
> > -	b -= c; b -= a; b ^= (a<<8); \
> > -	c -= a; c -= b; c ^= (b>>13); \
> > -	a -= b; a -= c; a ^= (c>>12); \
> > -	b -= c; b -= a; b ^= (a<<16); \
> > -	c -= a; c -= b; c ^= (b>>5); \
> > -	a -= b; a -= c; a ^= (c>>3); \
> > -	b -= c; b -= a; b ^= (a<<10); \
> > -	c -= a; c -= b; c ^= (b>>15); \
> > +	a -= c; a ^= rot(c, 4); c += b; \
> > +	b -= a; b ^= rot(a, 6); a += c; \
> > +	c -= b; c ^= rot(b, 8); b += a; \
> > +	a -= c; a ^= rot(c, 16); c += b; \
> > +	b -= a; b ^= rot(a, 19); a += c; \
> > +	c -= b; c ^= rot(b, 4); b += a; \
> > +} while (0)
> > +
> > +#define __rte_jhash_final(a, b, c) do { \
> > +	c ^= b; c -= rot(b, 14); \
> > +	a ^= c; a -= rot(c, 11); \
> > +	b ^= a; b -= rot(a, 25); \
> > +	c ^= b; c -= rot(b, 16); \
> > +	a ^= c; a -= rot(c, 4);  \
> > +	b ^= a; b -= rot(a, 14); \
> > +	c ^= b; c -= rot(b, 24); \
> >  } while (0)
> >
> >  /** The golden ratio: an arbitrary value. */
> > -#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
> > +#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
> > +
> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +#define RTE_JHASH_BYTE0_SHIFT 0
> > +#define RTE_JHASH_BYTE1_SHIFT 8
> > +#define RTE_JHASH_BYTE2_SHIFT 16
> > +#define RTE_JHASH_BYTE3_SHIFT 24
> > +#else
> > +#define RTE_JHASH_BYTE0_SHIFT 24
> > +#define RTE_JHASH_BYTE1_SHIFT 16
> > +#define RTE_JHASH_BYTE2_SHIFT 8
> > +#define RTE_JHASH_BYTE3_SHIFT 0
> > +#endif
> > +
> > +#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
> > +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
> > +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
> >
> >  /**
> >   * The most generic version, hashes an arbitrary sequence
> > @@ -95,42 +125,119 @@ extern "C" {
> >  static inline uint32_t
> >  rte_jhash(const void *key, uint32_t length, uint32_t initval)
> >  {
> > -	uint32_t a, b, c, len;
> > -	const uint8_t *k = (const uint8_t *)key;
> > -	const uint32_t *k32 = (const uint32_t *)key;
> > +	uint32_t a, b, c;
> > +	union {
> > +		const void *ptr;
> > +		size_t i;
> > +	} u;
> >
> > -	len = length;
> > -	a = b = RTE_JHASH_GOLDEN_RATIO;
> > -	c = initval;
> > +	/* Set up the internal state */
> > +	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
> >
> > -	while (len >= 12) {
> > -		a += k32[0];
> > -		b += k32[1];
> > -		c += k32[2];
> > +	u.ptr = key;
> >
> > -		__rte_jhash_mix(a,b,c);
> > +	/* Check key alignment. For x86 architecture, first case is always
> optimal */
> > +	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i
> & 0x3) == 0) {
> 
> Wonder why strcmp(), why not something like: 'if defined(RTE_ARCH_I686)
> || defined(RTE_ARCH_X86_64)' as in all other places?
> Another question what would be in case of RTE_ARCH="x86_x32"?
> Konstantin

Functionally is the same and using this method, I can integrate all conditions in one line, so it takes less code.
I also checked the assembly code, and the compiler removes the check if it is Intel architecture, so performance remains the same.

Re x86_x32, you are right, probably I need to include it. Although, I just realized that it is not used in any other place.
Wonder if we should include it somewhere else? E.g. rte_hash_crc.h

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
  2015-05-06  9:36         ` De Lara Guarch, Pablo
@ 2015-05-06 16:11           ` Ananyev, Konstantin
  2015-05-07 11:11           ` Ananyev, Konstantin
  1 sibling, 0 replies; 62+ messages in thread
From: Ananyev, Konstantin @ 2015-05-06 16:11 UTC (permalink / raw)
  To: De Lara Guarch, Pablo, dev

Hi Pablo,

> -----Original Message-----
> From: De Lara Guarch, Pablo
> Sent: Wednesday, May 06, 2015 10:36 AM
> To: Ananyev, Konstantin; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
> 
> Hi Konstantin,
> 
> > -----Original Message-----
> > From: Ananyev, Konstantin
> > Sent: Wednesday, May 06, 2015 1:36 AM
> > To: De Lara Guarch, Pablo; dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> > latest available
> >
> >
> > Hi Pablo,
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> > > Sent: Tuesday, May 05, 2015 3:44 PM
> > > To: dev@dpdk.org
> > > Subject: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> > latest available
> > >
> > > Jenkins hash function was developed originally in 1996,
> > > and was integrated in first versions of DPDK.
> > > The function has been improved in 2006,
> > > achieving up to 60% better performance, compared to the original one.
> > >
> > > This patch integrates that code into the rte_jhash library.
> > >
> > > Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> > > ---
> > >  lib/librte_hash/rte_jhash.h |  261
> > +++++++++++++++++++++++++++++++------------
> > >  1 files changed, 188 insertions(+), 73 deletions(-)
> > >
> > > diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> > > index a4bf5a1..0e96b7c 100644
> > > --- a/lib/librte_hash/rte_jhash.h
> > > +++ b/lib/librte_hash/rte_jhash.h
> > > @@ -1,7 +1,7 @@
> > >  /*-
> > >   *   BSD LICENSE
> > >   *
> > > - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> > >   *   All rights reserved.
> > >   *
> > >   *   Redistribution and use in source and binary forms, with or without
> > > @@ -45,38 +45,68 @@ extern "C" {
> > >  #endif
> > >
> > >  #include <stdint.h>
> > > +#include <string.h>
> > > +#include <rte_byteorder.h>
> > >
> > >  /* jhash.h: Jenkins hash support.
> > >   *
> > > - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
> > > + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
> > >   *
> > >   * http://burtleburtle.net/bob/hash/
> > >   *
> > >   * These are the credits from Bob's sources:
> > >   *
> > > - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> > > - * hash(), hash2(), hash3, and mix() are externally useful functions.
> > > - * Routines to test the hash are included if SELF_TEST is defined.
> > > - * You can use this free for any purpose.  It has no warranty.
> > > + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> > > + *
> > > + * These are functions for producing 32-bit hashes for hash table lookup.
> > > + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> > > + * are externally useful functions.  Routines to test the hash are included
> > > + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> > > + * the public domain.  It has no warranty.
> > >   *
> > >   * $FreeBSD$
> > >   */
> > >
> > > +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
> > > +
> > >  /** @internal Internal function. NOTE: Arguments are modified. */
> > >  #define __rte_jhash_mix(a, b, c) do { \
> > > -	a -= b; a -= c; a ^= (c>>13); \
> > > -	b -= c; b -= a; b ^= (a<<8); \
> > > -	c -= a; c -= b; c ^= (b>>13); \
> > > -	a -= b; a -= c; a ^= (c>>12); \
> > > -	b -= c; b -= a; b ^= (a<<16); \
> > > -	c -= a; c -= b; c ^= (b>>5); \
> > > -	a -= b; a -= c; a ^= (c>>3); \
> > > -	b -= c; b -= a; b ^= (a<<10); \
> > > -	c -= a; c -= b; c ^= (b>>15); \
> > > +	a -= c; a ^= rot(c, 4); c += b; \
> > > +	b -= a; b ^= rot(a, 6); a += c; \
> > > +	c -= b; c ^= rot(b, 8); b += a; \
> > > +	a -= c; a ^= rot(c, 16); c += b; \
> > > +	b -= a; b ^= rot(a, 19); a += c; \
> > > +	c -= b; c ^= rot(b, 4); b += a; \
> > > +} while (0)
> > > +
> > > +#define __rte_jhash_final(a, b, c) do { \
> > > +	c ^= b; c -= rot(b, 14); \
> > > +	a ^= c; a -= rot(c, 11); \
> > > +	b ^= a; b -= rot(a, 25); \
> > > +	c ^= b; c -= rot(b, 16); \
> > > +	a ^= c; a -= rot(c, 4);  \
> > > +	b ^= a; b -= rot(a, 14); \
> > > +	c ^= b; c -= rot(b, 24); \
> > >  } while (0)
> > >
> > >  /** The golden ratio: an arbitrary value. */
> > > -#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
> > > +#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
> > > +
> > > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > > +#define RTE_JHASH_BYTE0_SHIFT 0
> > > +#define RTE_JHASH_BYTE1_SHIFT 8
> > > +#define RTE_JHASH_BYTE2_SHIFT 16
> > > +#define RTE_JHASH_BYTE3_SHIFT 24
> > > +#else
> > > +#define RTE_JHASH_BYTE0_SHIFT 24
> > > +#define RTE_JHASH_BYTE1_SHIFT 16
> > > +#define RTE_JHASH_BYTE2_SHIFT 8
> > > +#define RTE_JHASH_BYTE3_SHIFT 0
> > > +#endif
> > > +
> > > +#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
> > > +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
> > > +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
> > >
> > >  /**
> > >   * The most generic version, hashes an arbitrary sequence
> > > @@ -95,42 +125,119 @@ extern "C" {
> > >  static inline uint32_t
> > >  rte_jhash(const void *key, uint32_t length, uint32_t initval)
> > >  {
> > > -	uint32_t a, b, c, len;
> > > -	const uint8_t *k = (const uint8_t *)key;
> > > -	const uint32_t *k32 = (const uint32_t *)key;
> > > +	uint32_t a, b, c;
> > > +	union {
> > > +		const void *ptr;
> > > +		size_t i;
> > > +	} u;
> > >
> > > -	len = length;
> > > -	a = b = RTE_JHASH_GOLDEN_RATIO;
> > > -	c = initval;
> > > +	/* Set up the internal state */
> > > +	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
> > >
> > > -	while (len >= 12) {
> > > -		a += k32[0];
> > > -		b += k32[1];
> > > -		c += k32[2];
> > > +	u.ptr = key;
> > >
> > > -		__rte_jhash_mix(a,b,c);
> > > +	/* Check key alignment. For x86 architecture, first case is always
> > optimal */
> > > +	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i
> > & 0x3) == 0) {
> >
> > Wonder why strcmp(), why not something like: 'if defined(RTE_ARCH_I686)
> > || defined(RTE_ARCH_X86_64)' as in all other places?
> > Another question what would be in case of RTE_ARCH="x86_x32"?
> > Konstantin
> 
> Functionally is the same and using this method, I can integrate all conditions in one line, so it takes less code.
> I also checked the assembly code, and the compiler removes the check if it is Intel architecture, so performance remains the same.

Well,  yes I think most modern compilers  treat strcmp() as a builtin function and are able to optimise these strcmp() calls off for that case.
But  we probably can't guarantee that it would always be the case for all different compiler/libc combinations.
Again, by some reason user might need to use ' -fno-builtin' flag while building his stuff.
So I would use pre-processor macros here, it is more predictable.
Again, that way it is consistent with other places.
 
Actually I wonder do you really need such sort of diversity for aligned/non-aligned case?
Wonder wouldn't something like that work for you:

#infdef  RTE_ARCH_X86
        const uint32_t *k = (uint32_t *)((uintptr_t)key & (uintptr_t)~3);
        const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
#else /*X86*/
        const uint32_t *k = key;
        const uint32_t s = 0;
#endif

  while (len > 12) {
                a += k[0] >> s | (uint64_t)k[1] << (32 - s);
                b += k[1] >> s | (uint64_t)k[2] << (32 - s);
                c += k[2] >> s | (uint64_t)k[3] << (32 - s);
                k += 3;
                length -= 12;
}

switch (length) {
case 12:
    a += k[0] >> s | (uint64_t)k[1] << (32 - s);
    b += k[1] >> s | (uint64_t)k[2] << (32 - s);
    c += k[2] >> s | (uint64_t)k[3] << (32 - s);
    break;
case 11:
    a += k[0] >> s | (uint64_t)k[1] << (32 - s);
    b += k[1] >> s | (uint64_t)k[2] << (32 - s);
    c += (k[2] >> s | (uint64_t)k[3] << (32 - s)) & & LOWER24b_MASK;
    break;
...
case 1:
   a += (k[0] >> s | (uint64_t)k[1] << (32 - s)) & LOWER8b_MASK;
   break;
...

In that way, even for non-aligned you don't need do 4B reads.
For x86, compiler would do it's optimisation work and strip off '>> s | (uint64_t)k[..] << (32 - s);'.

> 
> Re x86_x32, you are right, probably I need to include it. Although, I just realized that it is not used in any other place.
> Wonder if we should include it somewhere else? E.g. rte_hash_crc.h

Yep, that's true we are not doing it for hash_crc also...
Would probably good to have some sort of ' RTE_ARCH_X86' - that would be defined for all x86 targets and use it whenever applicable.
But I suppose, that's a subject for another patch. 

Konstantin

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
  2015-05-06  9:36         ` De Lara Guarch, Pablo
  2015-05-06 16:11           ` Ananyev, Konstantin
@ 2015-05-07 11:11           ` Ananyev, Konstantin
  1 sibling, 0 replies; 62+ messages in thread
From: Ananyev, Konstantin @ 2015-05-07 11:11 UTC (permalink / raw)
  To: De Lara Guarch, Pablo, dev



> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Wednesday, May 06, 2015 5:11 PM
> To: De Lara Guarch, Pablo; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
> 
> Hi Pablo,
> 
> > -----Original Message-----
> > From: De Lara Guarch, Pablo
> > Sent: Wednesday, May 06, 2015 10:36 AM
> > To: Ananyev, Konstantin; dev@dpdk.org
> > Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available
> >
> > Hi Konstantin,
> >
> > > -----Original Message-----
> > > From: Ananyev, Konstantin
> > > Sent: Wednesday, May 06, 2015 1:36 AM
> > > To: De Lara Guarch, Pablo; dev@dpdk.org
> > > Subject: RE: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> > > latest available
> > >
> > >
> > > Hi Pablo,
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pablo de Lara
> > > > Sent: Tuesday, May 05, 2015 3:44 PM
> > > > To: dev@dpdk.org
> > > > Subject: [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the
> > > latest available
> > > >
> > > > Jenkins hash function was developed originally in 1996,
> > > > and was integrated in first versions of DPDK.
> > > > The function has been improved in 2006,
> > > > achieving up to 60% better performance, compared to the original one.
> > > >
> > > > This patch integrates that code into the rte_jhash library.
> > > >
> > > > Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> > > > ---
> > > >  lib/librte_hash/rte_jhash.h |  261
> > > +++++++++++++++++++++++++++++++------------
> > > >  1 files changed, 188 insertions(+), 73 deletions(-)
> > > >
> > > > diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> > > > index a4bf5a1..0e96b7c 100644
> > > > --- a/lib/librte_hash/rte_jhash.h
> > > > +++ b/lib/librte_hash/rte_jhash.h
> > > > @@ -1,7 +1,7 @@
> > > >  /*-
> > > >   *   BSD LICENSE
> > > >   *
> > > > - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> > > > + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> > > >   *   All rights reserved.
> > > >   *
> > > >   *   Redistribution and use in source and binary forms, with or without
> > > > @@ -45,38 +45,68 @@ extern "C" {
> > > >  #endif
> > > >
> > > >  #include <stdint.h>
> > > > +#include <string.h>
> > > > +#include <rte_byteorder.h>
> > > >
> > > >  /* jhash.h: Jenkins hash support.
> > > >   *
> > > > - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
> > > > + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
> > > >   *
> > > >   * http://burtleburtle.net/bob/hash/
> > > >   *
> > > >   * These are the credits from Bob's sources:
> > > >   *
> > > > - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> > > > - * hash(), hash2(), hash3, and mix() are externally useful functions.
> > > > - * Routines to test the hash are included if SELF_TEST is defined.
> > > > - * You can use this free for any purpose.  It has no warranty.
> > > > + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> > > > + *
> > > > + * These are functions for producing 32-bit hashes for hash table lookup.
> > > > + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> > > > + * are externally useful functions.  Routines to test the hash are included
> > > > + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> > > > + * the public domain.  It has no warranty.
> > > >   *
> > > >   * $FreeBSD$
> > > >   */
> > > >
> > > > +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
> > > > +
> > > >  /** @internal Internal function. NOTE: Arguments are modified. */
> > > >  #define __rte_jhash_mix(a, b, c) do { \
> > > > -	a -= b; a -= c; a ^= (c>>13); \
> > > > -	b -= c; b -= a; b ^= (a<<8); \
> > > > -	c -= a; c -= b; c ^= (b>>13); \
> > > > -	a -= b; a -= c; a ^= (c>>12); \
> > > > -	b -= c; b -= a; b ^= (a<<16); \
> > > > -	c -= a; c -= b; c ^= (b>>5); \
> > > > -	a -= b; a -= c; a ^= (c>>3); \
> > > > -	b -= c; b -= a; b ^= (a<<10); \
> > > > -	c -= a; c -= b; c ^= (b>>15); \
> > > > +	a -= c; a ^= rot(c, 4); c += b; \
> > > > +	b -= a; b ^= rot(a, 6); a += c; \
> > > > +	c -= b; c ^= rot(b, 8); b += a; \
> > > > +	a -= c; a ^= rot(c, 16); c += b; \
> > > > +	b -= a; b ^= rot(a, 19); a += c; \
> > > > +	c -= b; c ^= rot(b, 4); b += a; \
> > > > +} while (0)
> > > > +
> > > > +#define __rte_jhash_final(a, b, c) do { \
> > > > +	c ^= b; c -= rot(b, 14); \
> > > > +	a ^= c; a -= rot(c, 11); \
> > > > +	b ^= a; b -= rot(a, 25); \
> > > > +	c ^= b; c -= rot(b, 16); \
> > > > +	a ^= c; a -= rot(c, 4);  \
> > > > +	b ^= a; b -= rot(a, 14); \
> > > > +	c ^= b; c -= rot(b, 24); \
> > > >  } while (0)
> > > >
> > > >  /** The golden ratio: an arbitrary value. */
> > > > -#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
> > > > +#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
> > > > +
> > > > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > > > +#define RTE_JHASH_BYTE0_SHIFT 0
> > > > +#define RTE_JHASH_BYTE1_SHIFT 8
> > > > +#define RTE_JHASH_BYTE2_SHIFT 16
> > > > +#define RTE_JHASH_BYTE3_SHIFT 24
> > > > +#else
> > > > +#define RTE_JHASH_BYTE0_SHIFT 24
> > > > +#define RTE_JHASH_BYTE1_SHIFT 16
> > > > +#define RTE_JHASH_BYTE2_SHIFT 8
> > > > +#define RTE_JHASH_BYTE3_SHIFT 0
> > > > +#endif
> > > > +
> > > > +#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
> > > > +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
> > > > +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
> > > >
> > > >  /**
> > > >   * The most generic version, hashes an arbitrary sequence
> > > > @@ -95,42 +125,119 @@ extern "C" {
> > > >  static inline uint32_t
> > > >  rte_jhash(const void *key, uint32_t length, uint32_t initval)
> > > >  {
> > > > -	uint32_t a, b, c, len;
> > > > -	const uint8_t *k = (const uint8_t *)key;
> > > > -	const uint32_t *k32 = (const uint32_t *)key;
> > > > +	uint32_t a, b, c;
> > > > +	union {
> > > > +		const void *ptr;
> > > > +		size_t i;
> > > > +	} u;
> > > >
> > > > -	len = length;
> > > > -	a = b = RTE_JHASH_GOLDEN_RATIO;
> > > > -	c = initval;
> > > > +	/* Set up the internal state */
> > > > +	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
> > > >
> > > > -	while (len >= 12) {
> > > > -		a += k32[0];
> > > > -		b += k32[1];
> > > > -		c += k32[2];
> > > > +	u.ptr = key;
> > > >
> > > > -		__rte_jhash_mix(a,b,c);
> > > > +	/* Check key alignment. For x86 architecture, first case is always
> > > optimal */
> > > > +	if (!strcmp(RTE_ARCH,"x86_64") || !strcmp(RTE_ARCH,"i686") || (u.i
> > > & 0x3) == 0) {
> > >
> > > Wonder why strcmp(), why not something like: 'if defined(RTE_ARCH_I686)
> > > || defined(RTE_ARCH_X86_64)' as in all other places?
> > > Another question what would be in case of RTE_ARCH="x86_x32"?
> > > Konstantin
> >
> > Functionally is the same and using this method, I can integrate all conditions in one line, so it takes less code.
> > I also checked the assembly code, and the compiler removes the check if it is Intel architecture, so performance remains the same.
> 
> Well,  yes I think most modern compilers  treat strcmp() as a builtin function and are able to optimise these strcmp() calls off for that
> case.
> But  we probably can't guarantee that it would always be the case for all different compiler/libc combinations.
> Again, by some reason user might need to use ' -fno-builtin' flag while building his stuff.
> So I would use pre-processor macros here, it is more predictable.
> Again, that way it is consistent with other places.
> 
> Actually I wonder do you really need such sort of diversity for aligned/non-aligned case?
> Wonder wouldn't something like that work for you:
> 
> #infdef  RTE_ARCH_X86
>         const uint32_t *k = (uint32_t *)((uintptr_t)key & (uintptr_t)~3);
>         const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
> #else /*X86*/
>         const uint32_t *k = key;
>         const uint32_t s = 0;
> #endif
> 
>   while (len > 12) {
>                 a += k[0] >> s | (uint64_t)k[1] << (32 - s);
>                 b += k[1] >> s | (uint64_t)k[2] << (32 - s);
>                 c += k[2] >> s | (uint64_t)k[3] << (32 - s);
>                 k += 3;
>                 length -= 12;
> }
> 
> switch (length) {
> case 12:
>     a += k[0] >> s | (uint64_t)k[1] << (32 - s);
>     b += k[1] >> s | (uint64_t)k[2] << (32 - s);
>     c += k[2] >> s | (uint64_t)k[3] << (32 - s);
>     break;
> case 11:
>     a += k[0] >> s | (uint64_t)k[1] << (32 - s);
>     b += k[1] >> s | (uint64_t)k[2] << (32 - s);
>     c += (k[2] >> s | (uint64_t)k[3] << (32 - s)) & & LOWER24b_MASK;
>     break;
> ...
> case 1:
>    a += (k[0] >> s | (uint64_t)k[1] << (32 - s)) & LOWER8b_MASK;
>    break;
> ...
> 
> In that way, even for non-aligned you don't need do 4B reads.
> For x86, compiler would do it's optimisation work and strip off '>> s | (uint64_t)k[..] << (32 - s);'.
> 

Actually, as Sergio pointed out, that approach might penalise non-x86 4B aligned case. 
So probably, a special path for s== 0 is still needed, i.e:
if (s==0) {...; a += k[0]; ...} else {...; a += k[0] >> s | (uint64_t)k[1] << (32 - s);...}
Konstantin

> >
> > Re x86_x32, you are right, probably I need to include it. Although, I just realized that it is not used in any other place.
> > Wonder if we should include it somewhere else? E.g. rte_hash_crc.h
> 
> Yep, that's true we are not doing it for hash_crc also...
> Would probably good to have some sort of ' RTE_ARCH_X86' - that would be defined for all x86 targets and use it whenever applicable.
> But I suppose, that's a subject for another patch.
> 
> Konstantin
> 

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
                       ` (5 preceding siblings ...)
  2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-05-12 11:02     ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
                         ` (8 more replies)
  6 siblings, 9 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run.

changes in v4:
- Simplify key alignment checks
- Include missing x86 arch check

changes in v3:

- Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
  functions

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

Pablo de Lara (6):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  hash: update jhash function with the latest available
  hash: add two new functions to jhash library
  hash: remove duplicated code
  hash: rename rte_jhash2 to rte_jhash_32b

 app/test/Makefile               |    1 +
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 +-
 app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
 app/test/test_hash_perf.c       |   71 +--------
 lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++----------
 6 files changed, 402 insertions(+), 159 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 1/6] test/hash: move hash function perf tests to separate file
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
                         ` (7 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/Makefile              |    1 +
 app/test/test_hash_func_perf.c |  145 ++++++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c      |   71 +-------------------
 3 files changed, 147 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_func_perf.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..77a9c42 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_func_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
new file mode 100644
index 0000000..ba31c53
--- /dev/null
+++ b/app/test/test_hash_func_perf.c
@@ -0,0 +1,145 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_cycles.h>
+#include <rte_random.h>
+
+#include "test.h"
+
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+/*******************************************************************************
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 1000000
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+/******************************************************************************/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *get_hash_name(rte_hash_function f)
+{
+	if (f == rte_jhash)
+		return "jhash";
+
+	if (f == rte_hash_crc)
+		return "rte_hash_crc";
+
+	return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
+		uint32_t key_len)
+{
+	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks = 0, start, end;
+	unsigned i, j;
+
+	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+		for (j = 0; j < key_len; j++)
+			key[j] = (uint8_t) rte_rand();
+
+		start = rte_rdtsc();
+		f(key, key_len, init_val);
+		end = rte_rdtsc();
+		ticks += end - start;
+	}
+
+	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
+			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void run_hash_func_tests(void)
+{
+	unsigned i, j, k;
+
+	printf(" *** Hash function performance test results ***\n");
+	printf(" Number of iterations for each test = %d\n",
+			HASHTEST_ITERATIONS);
+	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
+
+	for (i = 0;
+	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i++) {
+		for (j = 0;
+		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
+		     j++) {
+			for (k = 0;
+			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+			     k++) {
+				run_hash_func_test(hashtest_funcs[i],
+						hashtest_initvals[j],
+						hashtest_key_lens[k]);
+			}
+		}
+	}
+}
+
+static int
+test_hash_func_perf(void)
+{
+	run_hash_func_tests();
+
+	return 0;
+}
+
+static struct test_command hash_func_perf_cmd = {
+	.command = "hash_func_perf_autotest",
+	.callback = test_hash_func_perf,
+};
+REGISTER_TEST_COMMAND(hash_func_perf_cmd);
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..d0e5ce0 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -85,20 +85,6 @@ struct tbl_perf_test_params {
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
 /*******************************************************************************
- * Hash function performance test configuration section. Each performance test
- * will be performed HASHTEST_ITERATIONS times.
- *
- * The five arrays below control what tests are performed. Every combination
- * from the array entries is tested.
- */
-#define HASHTEST_ITERATIONS 1000000
-
-static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
-/******************************************************************************/
-
-/*******************************************************************************
  * Hash table performance test configuration section.
  */
 struct tbl_perf_test_params tbl_perf_params[] =
@@ -617,60 +603,6 @@ static int run_all_tbl_perf_tests(void)
 	return 0;
 }
 
-/*
- * Test a hash function.
- */
-static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
-{
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
-	unsigned i, j;
-
-	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
-		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
-	}
-
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
-}
-
-/*
- * Test all hash functions.
- */
-static void run_hash_func_tests(void)
-{
-	unsigned i, j, k;
-
-	printf("\n\n *** Hash function performance test results ***\n");
-	printf(" Number of iterations for each test = %d\n",
-			HASHTEST_ITERATIONS);
-	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
-
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
-			}
-		}
-	}
-}
-
 /* Control operation of performance testing of fbk hash. */
 #define LOAD_FACTOR 0.667	/* How full to make the hash table. */
 #define TEST_SIZE 1000000	/* How many operations to time. */
@@ -757,7 +689,6 @@ test_hash_perf(void)
 {
 	if (run_all_tbl_perf_tests() < 0)
 		return -1;
-	run_hash_func_tests();
 
 	if (fbk_hash_perf_test() < 0)
 		return -1;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 2/6] test/hash: improve accuracy on cycle measurements
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 3/6] hash: update jhash function with the latest available Pablo de Lara
                         ` (6 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_func_perf.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_func_perf.c b/app/test/test_hash_func_perf.c
index ba31c53..004c9be 100644
--- a/app/test/test_hash_func_perf.c
+++ b/app/test/test_hash_func_perf.c
@@ -82,21 +82,21 @@ static const char *get_hash_name(rte_hash_function f)
 static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
+	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks, start, end;
 	unsigned i, j;
 
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
 		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
+			key[i][j] = (uint8_t) rte_rand();
 	}
 
+	start = rte_rdtsc();
+	for (i = 0; i < HASHTEST_ITERATIONS; i++)
+		f(key[i], key_len, init_val);
+	end = rte_rdtsc();
+	ticks = end - start;
+
 	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
 			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 3/6] hash: update jhash function with the latest available
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 4/6] hash: add two new functions to jhash library Pablo de Lara
                         ` (5 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  266 +++++++++++++++++++++++++++++++------------
 1 files changed, 193 insertions(+), 73 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..41297ab 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,62 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <string.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k))))
+#else
+#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k))))
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +119,130 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
 
-		__rte_jhash_mix(a,b,c);
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+			__rte_jhash_mix(a, b, c);
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			k += 3;
+			length -= 12;
+		}
 
-	__rte_jhash_mix(a,b,c);
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,33 +263,51 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
+static inline uint32_t
+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += RTE_JHASH_GOLDEN_RATIO + initval;
+	b += RTE_JHASH_GOLDEN_RATIO + initval;
+	c += RTE_JHASH_GOLDEN_RATIO + initval;
+
+	__rte_jhash_final(a, b, c);
+
+	return c;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
@@ -197,17 +327,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-	a += RTE_JHASH_GOLDEN_RATIO;
-	b += RTE_JHASH_GOLDEN_RATIO;
-	c += initval;
-
-	__rte_jhash_mix(a, b, c);
-
-	/*
-	 * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
-	 *       normally done at the end is not done here.
-	 */
-	return c;
+	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
 }
 
 /**
@@ -226,7 +346,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 static inline uint32_t
 rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 {
-	return rte_jhash_3words(a, b, 0, initval);
+	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
 }
 
 /**
@@ -243,7 +363,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 static inline uint32_t
 rte_jhash_1word(uint32_t a, uint32_t initval)
 {
-	return rte_jhash_3words(a, 0, 0, initval);
+	return __rte_jhash_3words(a + 4, 4, 4, initval);
 }
 
 #ifdef __cplusplus
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 4/6] hash: add two new functions to jhash library
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (2 preceding siblings ...)
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 3/6] hash: update jhash function with the latest available Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 5/6] hash: remove duplicated code Pablo de Lara
                         ` (4 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
                     and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
                     and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  206 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 206 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 41297ab..f1c0a8a 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -297,6 +297,212 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 	return c;
 }
 
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
+
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash2, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash2.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
+
 static inline uint32_t
 __rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 5/6] hash: remove duplicated code
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (3 preceding siblings ...)
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 4/6] hash: add two new functions to jhash library Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
                         ` (3 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

rte_jhash is basically like _rte_jhash_2hashes but it returns only 1 hash, instead of 2.
In order to remove duplicated code, rte_jhash calls _rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is required.
So,to avoid duplicated code, the function calls _rte_jhash_2hashes with check_align = 0
(to use the optimal path)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  293 +++++++++----------------------------------
 1 files changed, 60 insertions(+), 233 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index f1c0a8a..aa5bb2d 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -102,29 +102,19 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb, unsigned check_align)
 {
 	uint32_t a, b, c;
 
 	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
 
-	/* Check key alignment. For x86 architecture, first case is always optimal */
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
 	const uint32_t *k = key;
 	const uint32_t s = 0;
@@ -132,8 +122,7 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
 	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
 #endif
-
-	if (s == 0) {
+	if (!check_align || s == 0) {
 		while (length > 12) {
 			a += k[0];
 			b += k[1];
@@ -172,7 +161,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += k[0] & LOWER8b_MASK; break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		};
 	} else {
 		/* all but the last block: affect some 32 bits of (a, b, c) */
@@ -238,63 +229,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		}
 	}
 
 	__rte_jhash_final(a, b, c);
 
-	return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-
-		__rte_jhash_mix(a, b, c);
-
-		k += 3;
-		length -= 3;
-	}
-
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
-
-	return c;
+	*pc = c;
+	*pb = b;
 }
 
 /**
@@ -315,138 +259,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
-	c += *pb;
-
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
-	const uint32_t *k = key;
-	const uint32_t s = 0;
-#else
-	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
-	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
-#endif
-
-	if (s == 0) {
-		while (length > 12) {
-			a += k[0];
-			b += k[1];
-			c += k[2];
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		switch (length) {
-		case 12:
-			c += k[2]; b += k[1]; a += k[0]; break;
-		case 11:
-			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
-		case 10:
-			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
-		case 9:
-			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
-		case 8:
-			b += k[1]; a += k[0]; break;
-		case 7:
-			b += k[1] & LOWER24b_MASK; a += k[0]; break;
-		case 6:
-			b += k[1] & LOWER16b_MASK; a += k[0]; break;
-		case 5:
-			b += k[1] & LOWER8b_MASK; a += k[0]; break;
-		case 4:
-			a += k[0]; break;
-		case 3:
-			a += k[0] & LOWER24b_MASK; break;
-		case 2:
-			a += k[0] & LOWER16b_MASK; break;
-		case 1:
-			a += k[0] & LOWER8b_MASK; break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		};
-	} else {
-		/* all but the last block: affect some 32 bits of (a, b, c) */
-		while (length > 12) {
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		/* last block: affect all 32 bits of (c) */
-		switch (length) {
-		case 12:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			break;
-		case 11:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
-			break;
-		case 10:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
-			break;
-		case 9:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
-			break;
-		case 8:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			break;
-		case 7:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
-			break;
-		case 6:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
-			break;
-		case 5:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
-			break;
-		case 4:
-			a += BIT_SHIFT(k[0], k[1], s);
-			break;
-		case 3:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
-			break;
-		case 2:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
-			break;
-		case 1:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
-			break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		}
-	}
-
-	__rte_jhash_final(a, b, c);
-
-	*pc = c;
-	*pb = b;
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
 }
 
 /**
@@ -467,40 +280,54 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 static inline void
 rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
 
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
-	c += *pb;
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
+	rte_jhash_2hashes(key, length, &initval, &initval2);
 
-		__rte_jhash_mix(a, b, c);
+	return initval;
+}
 
-		k += 3;
-		length -= 3;
-	}
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
+	rte_jhash2_2hashes(k, length, &initval, &initval2);
 
-	*pc = c;
-	*pb = b;
+	return initval;
 }
 
 static inline uint32_t
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v4 6/6] hash: rename rte_jhash2 to rte_jhash_32b
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (4 preceding siblings ...)
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 5/6] hash: remove duplicated code Pablo de Lara
@ 2015-05-12 11:02       ` Pablo de Lara
  2015-05-12 15:33       ` [dpdk-dev] [PATCH v4 0/6] update jhash function Neil Horman
                         ` (2 subsequent siblings)
  8 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-12 11:02 UTC (permalink / raw)
  To: dev

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 ++--
 lib/librte_hash/rte_jhash.h     |   17 +++++++++++++++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
 		.entries = 16,
 		.bucket_entries = 4,
 		.key_len = 4,
-		.hash_func = (rte_hash_function)rte_jhash2,
+		.hash_func = (rte_hash_function)rte_jhash_32b,
 		.hash_func_init_val = 0,
 		.socket_id = 0,
 	};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)
 
 	hash_params_ex.name = "hash_test_jhash2";
 	hash_params_ex.key_len = 4;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)
 
 	hash_params_ex.name = "hash_test_2_jhash2";
 	hash_params_ex.key_len = 8;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL)
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index aa5bb2d..01b1795 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+
+#include <rte_log.h>
 #include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
@@ -278,7 +280,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
 	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -321,11 +323,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
  *   Calculated hash value.
  */
 static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
+
+static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
 	uint32_t initval2 = 0;
 
-	rte_jhash2_2hashes(k, length, &initval, &initval2);
+	RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
 
 	return initval;
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (5 preceding siblings ...)
  2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-05-12 15:33       ` Neil Horman
  2015-05-13 13:52         ` De Lara Guarch, Pablo
  2015-05-18 16:14       ` Bruce Richardson
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
  8 siblings, 1 reply; 62+ messages in thread
From: Neil Horman @ 2015-05-12 15:33 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> This patchset updates the current jhash in DPDK,
> including two new functions that generate two hashes from a single key.
> 
> It also separates the existing hash function performance tests to
> another file, to make it quicker to run.
> 
> changes in v4:
> - Simplify key alignment checks
> - Include missing x86 arch check
> 
> changes in v3:
> 
> - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
>   functions
> 
> changes in v2:
> 
> - Split single commit in three commits, one that updates the existing functions
>   and another that adds two new functions and use one of those functions
>   as a base to be called by the other ones.
> - Remove some unnecessary ifdefs in the code.
> - Add new macros to help on the reutilization of constants
> - Separate hash function performance tests to another file
>   and improve cycle measurements.
> - Rename existing function rte_jhash2 to rte_jhash_32b
>   (something more meaninful) and mark rte_jhash2 as
>   deprecated
> 
> Pablo de Lara (6):
>   test/hash: move hash function perf tests to separate file
>   test/hash: improve accuracy on cycle measurements
>   hash: update jhash function with the latest available
>   hash: add two new functions to jhash library
>   hash: remove duplicated code
>   hash: rename rte_jhash2 to rte_jhash_32b
> 
>  app/test/Makefile               |    1 +
>  app/test/test_func_reentrancy.c |    2 +-
>  app/test/test_hash.c            |    4 +-
>  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
>  app/test/test_hash_perf.c       |   71 +--------
>  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++----------
>  6 files changed, 402 insertions(+), 159 deletions(-)
>  create mode 100644 app/test/test_hash_func_perf.c
> 
> -- 
> 1.7.4.1
> 
> 
did you run this through the ABI checker?  I see you're removing several symbols
that will likely need to go through the ABI deprecation process.

Neil

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-12 15:33       ` [dpdk-dev] [PATCH v4 0/6] update jhash function Neil Horman
@ 2015-05-13 13:52         ` De Lara Guarch, Pablo
  2015-05-13 14:20           ` Neil Horman
  0 siblings, 1 reply; 62+ messages in thread
From: De Lara Guarch, Pablo @ 2015-05-13 13:52 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

Hi Neil,

> -----Original Message-----
> From: Neil Horman [mailto:nhorman@tuxdriver.com]
> Sent: Tuesday, May 12, 2015 4:33 PM
> To: De Lara Guarch, Pablo
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
> 
> On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 60% better performance, compared to the original one.
> >
> > This patchset updates the current jhash in DPDK,
> > including two new functions that generate two hashes from a single key.
> >
> > It also separates the existing hash function performance tests to
> > another file, to make it quicker to run.
> >
> > changes in v4:
> > - Simplify key alignment checks
> > - Include missing x86 arch check
> >
> > changes in v3:
> >
> > - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
> >   functions
> >
> > changes in v2:
> >
> > - Split single commit in three commits, one that updates the existing
> functions
> >   and another that adds two new functions and use one of those functions
> >   as a base to be called by the other ones.
> > - Remove some unnecessary ifdefs in the code.
> > - Add new macros to help on the reutilization of constants
> > - Separate hash function performance tests to another file
> >   and improve cycle measurements.
> > - Rename existing function rte_jhash2 to rte_jhash_32b
> >   (something more meaninful) and mark rte_jhash2 as
> >   deprecated
> >
> > Pablo de Lara (6):
> >   test/hash: move hash function perf tests to separate file
> >   test/hash: improve accuracy on cycle measurements
> >   hash: update jhash function with the latest available
> >   hash: add two new functions to jhash library
> >   hash: remove duplicated code
> >   hash: rename rte_jhash2 to rte_jhash_32b
> >
> >  app/test/Makefile               |    1 +
> >  app/test/test_func_reentrancy.c |    2 +-
> >  app/test/test_hash.c            |    4 +-
> >  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
> >  app/test/test_hash_perf.c       |   71 +--------
> >  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++-
> ---------
> >  6 files changed, 402 insertions(+), 159 deletions(-)
> >  create mode 100644 app/test/test_hash_func_perf.c
> >
> > --
> > 1.7.4.1
> >
> >
> did you run this through the ABI checker?  I see you're removing several
> symbols
> that will likely need to go through the ABI deprecation process.
> 
> Neil

I had not run it, but I just did. I see no problems on librte_hash
(but I see some on rte_ethdev.h, due to another commit).

Anyway, I renamed two functions to be more meaningful, but those functions are "static inline", 
so I am not sure exactly what the deprecation process is for those.
What I did was leaving the original function that calls the same function as the new renamed one,
but adds a line warning that the functions is deprecated.

Is that OK or should I do it differently?

Thanks!
Pablo

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-13 13:52         ` De Lara Guarch, Pablo
@ 2015-05-13 14:20           ` Neil Horman
  0 siblings, 0 replies; 62+ messages in thread
From: Neil Horman @ 2015-05-13 14:20 UTC (permalink / raw)
  To: De Lara Guarch, Pablo; +Cc: dev

On Wed, May 13, 2015 at 01:52:33PM +0000, De Lara Guarch, Pablo wrote:
> Hi Neil,
> 
> > -----Original Message-----
> > From: Neil Horman [mailto:nhorman@tuxdriver.com]
> > Sent: Tuesday, May 12, 2015 4:33 PM
> > To: De Lara Guarch, Pablo
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
> > 
> > On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> > > Jenkins hash function was developed originally in 1996,
> > > and was integrated in first versions of DPDK.
> > > The function has been improved in 2006,
> > > achieving up to 60% better performance, compared to the original one.
> > >
> > > This patchset updates the current jhash in DPDK,
> > > including two new functions that generate two hashes from a single key.
> > >
> > > It also separates the existing hash function performance tests to
> > > another file, to make it quicker to run.
> > >
> > > changes in v4:
> > > - Simplify key alignment checks
> > > - Include missing x86 arch check
> > >
> > > changes in v3:
> > >
> > > - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
> > >   functions
> > >
> > > changes in v2:
> > >
> > > - Split single commit in three commits, one that updates the existing
> > functions
> > >   and another that adds two new functions and use one of those functions
> > >   as a base to be called by the other ones.
> > > - Remove some unnecessary ifdefs in the code.
> > > - Add new macros to help on the reutilization of constants
> > > - Separate hash function performance tests to another file
> > >   and improve cycle measurements.
> > > - Rename existing function rte_jhash2 to rte_jhash_32b
> > >   (something more meaninful) and mark rte_jhash2 as
> > >   deprecated
> > >
> > > Pablo de Lara (6):
> > >   test/hash: move hash function perf tests to separate file
> > >   test/hash: improve accuracy on cycle measurements
> > >   hash: update jhash function with the latest available
> > >   hash: add two new functions to jhash library
> > >   hash: remove duplicated code
> > >   hash: rename rte_jhash2 to rte_jhash_32b
> > >
> > >  app/test/Makefile               |    1 +
> > >  app/test/test_func_reentrancy.c |    2 +-
> > >  app/test/test_hash.c            |    4 +-
> > >  app/test/test_hash_func_perf.c  |  145 +++++++++++++++++
> > >  app/test/test_hash_perf.c       |   71 +--------
> > >  lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++-
> > ---------
> > >  6 files changed, 402 insertions(+), 159 deletions(-)
> > >  create mode 100644 app/test/test_hash_func_perf.c
> > >
> > > --
> > > 1.7.4.1
> > >
> > >
> > did you run this through the ABI checker?  I see you're removing several
> > symbols
> > that will likely need to go through the ABI deprecation process.
> > 
> > Neil
> 
> I had not run it, but I just did. I see no problems on librte_hash
> (but I see some on rte_ethdev.h, due to another commit).
> 
> Anyway, I renamed two functions to be more meaningful, but those functions are "static inline", 
> so I am not sure exactly what the deprecation process is for those.
> What I did was leaving the original function that calls the same function as the new renamed one,
> but adds a line warning that the functions is deprecated.
> 
> Is that OK or should I do it differently?
> 
As long as their all static inline and binaries that are already compiled can
continue to access the data structures they reference at the member offsets
encoded to them at compile time, you should be ok.

Thanks!
Neil

> Thanks!
> Pablo
> 

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/6] update jhash function
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (6 preceding siblings ...)
  2015-05-12 15:33       ` [dpdk-dev] [PATCH v4 0/6] update jhash function Neil Horman
@ 2015-05-18 16:14       ` Bruce Richardson
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
  8 siblings, 0 replies; 62+ messages in thread
From: Bruce Richardson @ 2015-05-18 16:14 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Tue, May 12, 2015 at 12:02:32PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> This patchset updates the current jhash in DPDK,
> including two new functions that generate two hashes from a single key.
> 
> It also separates the existing hash function performance tests to
> another file, to make it quicker to run.
> 
> changes in v4:
> - Simplify key alignment checks
> - Include missing x86 arch check
> 
> changes in v3:
> 
> - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
>   functions
> 
> changes in v2:
> 
> - Split single commit in three commits, one that updates the existing functions
>   and another that adds two new functions and use one of those functions
>   as a base to be called by the other ones.
> - Remove some unnecessary ifdefs in the code.
> - Add new macros to help on the reutilization of constants
> - Separate hash function performance tests to another file
>   and improve cycle measurements.
> - Rename existing function rte_jhash2 to rte_jhash_32b
>   (something more meaninful) and mark rte_jhash2 as
>   deprecated
> 

Hi Pablo,

Patchset looks good to me, and unit tests all pass across the set. Some general
comments or suggestions though - particularly about testing.

1. The set of lengths used when testing the functions looks strange and rather
arbitrary. Perhaps we could have a set of key lengths which are documented. E.g.

	lengths[] = {
		4, 8, 16, 48, 64, /* standard key sizes */
		9,                /* IPv4 SRC + DST + protocol, unpadded */
		13,               /* IPv4 5-tuple, unpadded */
		37,               /* IPv6 5-tuple, unpadded */
		40,               /* IPv6 5-tuple, padded to 8-byte boundary */
	}

2. When testing multiple algorithms, it might be nice to change the order of the
loops so that we test all algorithms with the same key lengths first, and then
change length, rather than running the same algorithm with multiple lengths and
then changing algorithm. The output would be clearer and easier to see which
algorithm performs best for a given key-length.

3. For sanity checking across the patches making changes to the jhash functions,
I think it would be nice to have an initial sanity test with a set of known
keys and hash results in it. That way we can verify that the actual calculation
result never changes as the functions are modified. This would also be a big
help for future work changing the code. [As far as I can see, we don't ever check
in the algorithm checks that we are ever getting the right answer :-)]

All the above suggestions could perhaps go in a patch (or 2/3 patches) after the
first two, which splits out the algorithm tests, and before the actual changes
to the jhash implementation.

Regards,
/Bruce

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 00/10] update jhash function
  2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
                         ` (7 preceding siblings ...)
  2015-05-18 16:14       ` Bruce Richardson
@ 2015-05-22 10:16       ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
                           ` (10 more replies)
  8 siblings, 11 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run, and add new unit tests.

changes in v5:
- Add functional tests (mainly to test that all functions 
  return the expected hash values)
- Modify range of key sizes to test
- Change order of output for perf tests, so it is clearer
  to compare different hash functions for same key size/initial value
- Add new initial value to test in the hash functions
- Fix some errors caught by checkpatch
 
changes in v4:
- Simplify key alignment checks
- Include missing x86 arch check

changes in v3:

- Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
  functions

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

De Lara Guarch, Pablo (3):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  hash: add two new functions to jhash library

Pablo de Lara (7):
  test/hash: update key size range and initial values for testing
  test/hash: change order of loops in hash function tests
  test/hash: add new functional tests for hash functions
  hash: update jhash function with the latest available
  hash: remove duplicated code
  hash: rename rte_jhash2 to rte_jhash_32b
  test/hash: verify rte_jhash_1word/2words/3words

 app/test/Makefile               |    1 +
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 +-
 app/test/test_hash_functions.c  |  322 +++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c       |   71 +--------
 lib/librte_hash/rte_jhash.h     |  338 +++++++++++++++++++++++++++++----------
 6 files changed, 579 insertions(+), 159 deletions(-)
 create mode 100644 app/test/test_hash_functions.c

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 01/10] test/hash: move hash function perf tests to separate file
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
                           ` (9 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

From: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>

This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/Makefile              |    1 +
 app/test/test_hash_functions.c |  147 ++++++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c      |   71 +-------------------
 3 files changed, 149 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_functions.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..2311574 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
new file mode 100644
index 0000000..4efb3cd
--- /dev/null
+++ b/app/test/test_hash_functions.c
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 1000000
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+/******************************************************************************/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *
+get_hash_name(rte_hash_function f)
+{
+	if (f == rte_jhash)
+		return "jhash";
+
+	if (f == rte_hash_crc)
+		return "rte_hash_crc";
+
+	return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void
+run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
+		uint32_t key_len)
+{
+	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks = 0, start, end;
+	unsigned i, j;
+
+	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+		for (j = 0; j < key_len; j++)
+			key[j] = (uint8_t) rte_rand();
+
+		start = rte_rdtsc();
+		f(key, key_len, init_val);
+		end = rte_rdtsc();
+		ticks += end - start;
+	}
+
+	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
+			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void
+run_hash_func_perf_tests(void)
+{
+	unsigned i, j, k;
+
+	printf(" *** Hash function performance test results ***\n");
+	printf(" Number of iterations for each test = %d\n",
+			HASHTEST_ITERATIONS);
+	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
+
+	for (i = 0;
+	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i++) {
+		for (j = 0;
+		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
+		     j++) {
+			for (k = 0;
+			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+			     k++) {
+				run_hash_func_perf_test(hashtest_funcs[i],
+						hashtest_initvals[j],
+						hashtest_key_lens[k]);
+			}
+		}
+	}
+}
+
+static int
+test_hash_functions(void)
+{
+	run_hash_func_perf_tests();
+
+	return 0;
+}
+
+static struct test_command hash_functions_cmd = {
+	.command = "hash_functions_autotest",
+	.callback = test_hash_functions,
+};
+REGISTER_TEST_COMMAND(hash_functions_cmd);
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..d0e5ce0 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -85,20 +85,6 @@ struct tbl_perf_test_params {
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
 /*******************************************************************************
- * Hash function performance test configuration section. Each performance test
- * will be performed HASHTEST_ITERATIONS times.
- *
- * The five arrays below control what tests are performed. Every combination
- * from the array entries is tested.
- */
-#define HASHTEST_ITERATIONS 1000000
-
-static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
-/******************************************************************************/
-
-/*******************************************************************************
  * Hash table performance test configuration section.
  */
 struct tbl_perf_test_params tbl_perf_params[] =
@@ -617,60 +603,6 @@ static int run_all_tbl_perf_tests(void)
 	return 0;
 }
 
-/*
- * Test a hash function.
- */
-static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
-{
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
-	unsigned i, j;
-
-	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
-		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
-	}
-
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
-}
-
-/*
- * Test all hash functions.
- */
-static void run_hash_func_tests(void)
-{
-	unsigned i, j, k;
-
-	printf("\n\n *** Hash function performance test results ***\n");
-	printf(" Number of iterations for each test = %d\n",
-			HASHTEST_ITERATIONS);
-	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
-
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
-			}
-		}
-	}
-}
-
 /* Control operation of performance testing of fbk hash. */
 #define LOAD_FACTOR 0.667	/* How full to make the hash table. */
 #define TEST_SIZE 1000000	/* How many operations to time. */
@@ -757,7 +689,6 @@ test_hash_perf(void)
 {
 	if (run_all_tbl_perf_tests() < 0)
 		return -1;
-	run_hash_func_tests();
 
 	if (fbk_hash_perf_test() < 0)
 		return -1;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 02/10] test/hash: improve accuracy on cycle measurements
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
                           ` (8 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

From: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>

Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 4efb3cd..767b2bc 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -83,21 +83,21 @@ static void
 run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
+	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks, start, end;
 	unsigned i, j;
 
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
 		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
+			key[i][j] = (uint8_t) rte_rand();
 	}
 
+	start = rte_rdtsc();
+	for (i = 0; i < HASHTEST_ITERATIONS; i++)
+		f(key[i], key_len, init_val);
+	end = rte_rdtsc();
+	ticks = end - start;
+
 	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
 			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 03/10] test/hash: update key size range and initial values for testing
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
                           ` (7 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

Previous key sizes used for testing did not have much purpose.
This patch substitutes them with some more meaninful
(standard multiple of 2 key sizes, plus IPv4/v6 tuple and others)

Also an arbitrary initial value has been added
to increase the test coverage.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 767b2bc..973fbe8 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -57,8 +57,14 @@
 #define HASHTEST_ITERATIONS 1000000
 
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_initvals[] = {0, 0xdeadbeef};
+static uint32_t hashtest_key_lens[] = {
+ 4, 8, 16, 32, 48, 64, /* standard key sizes */
+ 9,                    /* IPv4 SRC + DST + protocol, unpadded */
+ 13,                   /* IPv4 5-tuple, unpadded */
+ 37,                   /* IPv6 5-tuple, unpadded */
+ 40                    /* IPv6 5-tuple, padded to 8-byte boundary */
+};
 /******************************************************************************/
 
 /*
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (2 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-06-10 11:05           ` Bruce Richardson
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
                           ` (6 subsequent siblings)
  10 siblings, 1 reply; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

In order to see more clearly the performance difference
between different hash functions, order of the loops
have been changed, so it iterates first through initial values,
then key sizes and then the hash functions.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 973fbe8..3b72e8f 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -86,8 +86,8 @@ get_hash_name(rte_hash_function f)
  * Test a hash function.
  */
 static void
-run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
+run_hash_func_perf_test(uint32_t key_len, uint32_t init_val,
+		rte_hash_function f)
 {
 	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
 	uint64_t ticks, start, end;
@@ -122,17 +122,17 @@ run_hash_func_perf_tests(void)
 	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
 
 	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i < sizeof(hashtest_initvals) / sizeof(uint32_t);
 	     i++) {
 		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
+		     j < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+	             j++) {
 			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_perf_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
+		             k < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+		             k++) {
+				run_hash_func_perf_test(hashtest_key_lens[j],
+						hashtest_initvals[i],
+						hashtest_funcs[k]);
 			}
 		}
 	}
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 05/10] test/hash: add new functional tests for hash functions
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (3 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available Pablo de Lara
                           ` (5 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

In order to make sure that the hash functions are returning
the correct values, new tests have been added:

- First test compares precalculated hash values with values calculated
from the existing hash functions.
- Second test compares values returned from rte_jhash2 and rte_jhash,
expecting same return (only for multiple of 4 bytes keys)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |  121 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 121 insertions(+), 0 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 3b72e8f..97a8fbd 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -47,6 +47,36 @@
 
 #include "test.h"
 
+/*
+ * Hash values calculated for key sizes from array "hashtest_key_lens"
+ * and for initial values from array "hashtest_initvals.
+ * Each key will be formed by increasing each byte by 1:
+ * e.g.: key size = 4, key = 0x03020100
+ *       key size = 8, key = 0x0706050403020100
+ */
+static uint32_t hash_values_jhash[2][10] = {{
+ 0x821cc2db, 0xa491f494, 0xace4cd87, 0x9e867842,
+ 0xd32442d6, 0x5fbafeab, 0x9cac434c, 0xecad9b0d,
+ 0x2dcf235e, 0xaab655d0
+},
+{
+ 0xc1111b14, 0x9a95039e, 0x84f208a0, 0xfa28f3fb,
+ 0xfa13f7d3, 0xc7aed470, 0x74caa938, 0xa9288066,
+ 0xd0140735, 0xbf00519d
+}
+};
+static uint32_t hash_values_crc[2][10] = {{
+ 0x91545164, 0x06040eb1, 0x9bb99201, 0xcc4c4fe4,
+ 0x14a90993, 0xf8a5dd8c, 0xc62beb31, 0x32bf340e,
+ 0x72f9d22b, 0x4a11475e
+},
+{
+ 0x98cd4c70, 0xd52c702f, 0x41fc0e1c, 0x3905f65c,
+ 0x94bff47f, 0x1bab102d, 0xd2911ed7, 0xe8faa813,
+ 0x6bea184b, 0x53028d3e
+}
+};
+
 /*******************************************************************************
  * Hash function performance test configuration section. Each performance test
  * will be performed HASHTEST_ITERATIONS times.
@@ -138,9 +168,100 @@ run_hash_func_perf_tests(void)
 	}
 }
 
+/*
+ * Verify that hash functions return what they are expected to return
+ * (using precalculated values stored above)
+ */
+static int
+verify_precalculated_hash_func_tests(void)
+{
+	unsigned i, j;
+	uint8_t key[64];
+	uint32_t hash;
+
+	for (i = 0; i < 64; i++)
+		key[i] = (uint8_t) i;
+
+	for (i = 0; i < sizeof(hashtest_key_lens) / sizeof(uint32_t); i++) {
+		for (j = 0; j < sizeof(hashtest_initvals) / sizeof(uint32_t); j++) {
+			hash = rte_jhash(key, hashtest_key_lens[i], hashtest_initvals[j]);
+			if (hash != hash_values_jhash[j][i]) {
+				printf("jhash for %u bytes with initial value 0x%x."
+				       "Expected 0x%x, but got 0x%x\n",
+				       hashtest_key_lens[i], hashtest_initvals[j],
+				       hash_values_jhash[j][i], hash);
+				return -1;
+			}
+
+			hash = rte_hash_crc(key, hashtest_key_lens[i], hashtest_initvals[j]);
+			if (hash != hash_values_crc[j][i]) {
+				printf("CRC for %u bytes with initial value 0x%x."
+				       "Expected 0x%x, but got 0x%x\n",
+				       hashtest_key_lens[i], hashtest_initvals[j],
+				       hash_values_crc[j][i], hash);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Verify that rte_jhash and rte_jhash2 return the same
+ */
+static int
+verify_jhash_32bits(void)
+{
+	unsigned i, j;
+	uint8_t key[64];
+	uint32_t hash, hash32;
+
+	for (i = 0; i < 64; i++)
+		key[i] = rand() & 0xff;
+
+	for (i = 0; i < sizeof(hashtest_key_lens) / sizeof(uint32_t); i++) {
+		for (j = 0; j < sizeof(hashtest_initvals) / sizeof(uint32_t); j++) {
+			/* Key size must be multiple of 4 (32 bits) */
+			if ((hashtest_key_lens[i] & 0x3) == 0) {
+				hash = rte_jhash(key, hashtest_key_lens[i], hashtest_initvals[j]);
+				/* Divide key length by 4 in rte_jhash for 32 bits */
+				hash32 = rte_jhash2((const uint32_t *)key, hashtest_key_lens[i] >> 2, hashtest_initvals[j]);
+				if (hash != hash32) {
+					printf("rte_jhash returns different value (0x%x)"
+					       "than rte_jhash2 (0x%x)\n",
+					       hash, hash32);
+					return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Run all functional tests for hash functions
+ */
+static int
+run_hash_func_tests(void)
+{
+	if (verify_precalculated_hash_func_tests() != 0)
+		return -1;
+
+	if (verify_jhash_32bits() != 0)
+		return -1;
+
+	return 0;
+
+}
+
 static int
 test_hash_functions(void)
 {
+	if (run_hash_func_tests() != 0)
+		return -1;
+
 	run_hash_func_perf_tests();
 
 	return 0;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (4 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-06-10 11:07           ` Bruce Richardson
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 07/10] hash: add two new functions to jhash library Pablo de Lara
                           ` (4 subsequent siblings)
  10 siblings, 1 reply; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.
It also updates the precalculated hash values in the unit test,
as the code now returns different values (expected)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |   12 +-
 lib/librte_hash/rte_jhash.h    |  266 +++++++++++++++++++++++++++++-----------
 2 files changed, 199 insertions(+), 79 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 97a8fbd..976dd95 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -55,14 +55,14 @@
  *       key size = 8, key = 0x0706050403020100
  */
 static uint32_t hash_values_jhash[2][10] = {{
- 0x821cc2db, 0xa491f494, 0xace4cd87, 0x9e867842,
- 0xd32442d6, 0x5fbafeab, 0x9cac434c, 0xecad9b0d,
- 0x2dcf235e, 0xaab655d0
+ 0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
+ 0x2f4775ab, 0x9ff036cc, 0xeca51474, 0xbc9d6816,
+ 0x12926a31, 0x1c9fa888
 },
 {
- 0xc1111b14, 0x9a95039e, 0x84f208a0, 0xfa28f3fb,
- 0xfa13f7d3, 0xc7aed470, 0x74caa938, 0xa9288066,
- 0xd0140735, 0xbf00519d
+ 0x8270ac65, 0x05fa6668, 0x762df861, 0xda088f2f,
+ 0x59614cd4, 0x7a94f690, 0xdc1e4993, 0x30825494,
+ 0x91d0e462, 0x768087fc
 }
 };
 static uint32_t hash_values_crc[2][10] = {{
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..41297ab 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,62 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <string.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k))))
+#else
+#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k))))
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +119,130 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
 
-		__rte_jhash_mix(a,b,c);
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+			__rte_jhash_mix(a, b, c);
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			k += 3;
+			length -= 12;
+		}
 
-	__rte_jhash_mix(a,b,c);
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,33 +263,51 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
+static inline uint32_t
+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += RTE_JHASH_GOLDEN_RATIO + initval;
+	b += RTE_JHASH_GOLDEN_RATIO + initval;
+	c += RTE_JHASH_GOLDEN_RATIO + initval;
+
+	__rte_jhash_final(a, b, c);
+
+	return c;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
@@ -197,17 +327,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-	a += RTE_JHASH_GOLDEN_RATIO;
-	b += RTE_JHASH_GOLDEN_RATIO;
-	c += initval;
-
-	__rte_jhash_mix(a, b, c);
-
-	/*
-	 * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
-	 *       normally done at the end is not done here.
-	 */
-	return c;
+	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
 }
 
 /**
@@ -226,7 +346,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 static inline uint32_t
 rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 {
-	return rte_jhash_3words(a, b, 0, initval);
+	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
 }
 
 /**
@@ -243,7 +363,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 static inline uint32_t
 rte_jhash_1word(uint32_t a, uint32_t initval)
 {
-	return rte_jhash_3words(a, 0, 0, initval);
+	return __rte_jhash_3words(a + 4, 4, 4, initval);
 }
 
 #ifdef __cplusplus
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 07/10] hash: add two new functions to jhash library
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (5 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 08/10] hash: remove duplicated code Pablo de Lara
                           ` (3 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

From: De Lara Guarch, Pablo <pablo.de.lara.guarch@intel.com>

With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
                     and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
                     and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  207 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 207 insertions(+), 0 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 41297ab..75fc596 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -297,6 +297,213 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 	return c;
 }
 
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
+
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash2, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash2.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	/* fallthrough */
+	case 3:
+		c += k[2];
+	case 2:
+		b += k[1];
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
+
 static inline uint32_t
 __rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 08/10] hash: remove duplicated code
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (6 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 07/10] hash: add two new functions to jhash library Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
                           ` (2 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

rte_jhash is basically like _rte_jhash_2hashes but it returns only 1 hash, instead of 2.
In order to remove duplicated code, rte_jhash calls _rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is required.
So,to avoid duplicated code, the function calls _rte_jhash_2hashes with check_align = 0
(to use the optimal path)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h |  294 +++++++++----------------------------------
 1 files changed, 60 insertions(+), 234 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 75fc596..aa5bb2d 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -102,29 +102,19 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb, unsigned check_align)
 {
 	uint32_t a, b, c;
 
 	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
 
-	/* Check key alignment. For x86 architecture, first case is always optimal */
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
 	const uint32_t *k = key;
 	const uint32_t s = 0;
@@ -132,8 +122,7 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
 	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
 #endif
-
-	if (s == 0) {
+	if (!check_align || s == 0) {
 		while (length > 12) {
 			a += k[0];
 			b += k[1];
@@ -172,7 +161,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += k[0] & LOWER8b_MASK; break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		};
 	} else {
 		/* all but the last block: affect some 32 bits of (a, b, c) */
@@ -238,63 +229,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		}
 	}
 
 	__rte_jhash_final(a, b, c);
 
-	return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-
-		__rte_jhash_mix(a, b, c);
-
-		k += 3;
-		length -= 3;
-	}
-
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
-
-	return c;
+	*pc = c;
+	*pb = b;
 }
 
 /**
@@ -315,138 +259,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
-	c += *pb;
-
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
-	const uint32_t *k = key;
-	const uint32_t s = 0;
-#else
-	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
-	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
-#endif
-
-	if (s == 0) {
-		while (length > 12) {
-			a += k[0];
-			b += k[1];
-			c += k[2];
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		switch (length) {
-		case 12:
-			c += k[2]; b += k[1]; a += k[0]; break;
-		case 11:
-			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
-		case 10:
-			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
-		case 9:
-			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
-		case 8:
-			b += k[1]; a += k[0]; break;
-		case 7:
-			b += k[1] & LOWER24b_MASK; a += k[0]; break;
-		case 6:
-			b += k[1] & LOWER16b_MASK; a += k[0]; break;
-		case 5:
-			b += k[1] & LOWER8b_MASK; a += k[0]; break;
-		case 4:
-			a += k[0]; break;
-		case 3:
-			a += k[0] & LOWER24b_MASK; break;
-		case 2:
-			a += k[0] & LOWER16b_MASK; break;
-		case 1:
-			a += k[0] & LOWER8b_MASK; break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		};
-	} else {
-		/* all but the last block: affect some 32 bits of (a, b, c) */
-		while (length > 12) {
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		/* last block: affect all 32 bits of (c) */
-		switch (length) {
-		case 12:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			break;
-		case 11:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
-			break;
-		case 10:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
-			break;
-		case 9:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
-			break;
-		case 8:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			break;
-		case 7:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
-			break;
-		case 6:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
-			break;
-		case 5:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
-			break;
-		case 4:
-			a += BIT_SHIFT(k[0], k[1], s);
-			break;
-		case 3:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
-			break;
-		case 2:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
-			break;
-		case 1:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
-			break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		}
-	}
-
-	__rte_jhash_final(a, b, c);
-
-	*pc = c;
-	*pb = b;
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
 }
 
 /**
@@ -467,41 +280,54 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 static inline void
 rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
 
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
-	c += *pb;
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
+	rte_jhash_2hashes(key, length, &initval, &initval2);
 
-		__rte_jhash_mix(a, b, c);
+	return initval;
+}
 
-		k += 3;
-		length -= 3;
-	}
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	/* fallthrough */
-	case 3:
-		c += k[2];
-	case 2:
-		b += k[1];
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
+	rte_jhash2_2hashes(k, length, &initval, &initval2);
 
-	*pc = c;
-	*pb = b;
+	return initval;
 }
 
 static inline uint32_t
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (7 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 08/10] hash: remove duplicated code Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-06-10 11:09           ` Bruce Richardson
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
  10 siblings, 1 reply; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_func_reentrancy.c |    2 +-
 app/test/test_hash.c            |    4 ++--
 app/test/test_hash_functions.c  |    6 +++---
 lib/librte_hash/rte_jhash.h     |   17 +++++++++++++++--
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
 		.entries = 16,
 		.bucket_entries = 4,
 		.key_len = 4,
-		.hash_func = (rte_hash_function)rte_jhash2,
+		.hash_func = (rte_hash_function)rte_jhash_32b,
 		.hash_func_init_val = 0,
 		.socket_id = 0,
 	};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)
 
 	hash_params_ex.name = "hash_test_jhash2";
 	hash_params_ex.key_len = 4;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)
 
 	hash_params_ex.name = "hash_test_2_jhash2";
 	hash_params_ex.key_len = 8;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL)
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 976dd95..04b2862 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -208,7 +208,7 @@ verify_precalculated_hash_func_tests(void)
 }
 
 /*
- * Verify that rte_jhash and rte_jhash2 return the same
+ * Verify that rte_jhash and rte_jhash_32b return the same
  */
 static int
 verify_jhash_32bits(void)
@@ -226,10 +226,10 @@ verify_jhash_32bits(void)
 			if ((hashtest_key_lens[i] & 0x3) == 0) {
 				hash = rte_jhash(key, hashtest_key_lens[i], hashtest_initvals[j]);
 				/* Divide key length by 4 in rte_jhash for 32 bits */
-				hash32 = rte_jhash2((const uint32_t *)key, hashtest_key_lens[i] >> 2, hashtest_initvals[j]);
+				hash32 = rte_jhash_32b((const uint32_t *)key, hashtest_key_lens[i] >> 2, hashtest_initvals[j]);
 				if (hash != hash32) {
 					printf("rte_jhash returns different value (0x%x)"
-					       "than rte_jhash2 (0x%x)\n",
+					       "than rte_jhash_32b (0x%x)\n",
 					       hash, hash32);
 					return -1;
 				}
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index aa5bb2d..01b1795 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+
+#include <rte_log.h>
 #include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
@@ -278,7 +280,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
 	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -321,11 +323,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
  *   Calculated hash value.
  */
 static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
+
+static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
 	uint32_t initval2 = 0;
 
-	rte_jhash2_2hashes(k, length, &initval, &initval2);
+	RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
 
 	return initval;
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v5 10/10] test/hash: verify rte_jhash_1word/2words/3words
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (8 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-05-22 10:16         ` Pablo de Lara
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-05-22 10:16 UTC (permalink / raw)
  To: dev

Added new test that verifies that rte_jhash_1words,
rte_jhash_2words and rte_jhash_3words return the same
values as rte_jhash.

Note that this patch has been added after the update
of the jhash function because these 3 functions did not
return the same values as rte_jhash before

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c |   48 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 04b2862..7c830b2 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -241,6 +241,51 @@ verify_jhash_32bits(void)
 }
 
 /*
+ * Verify that rte_jhash and rte_jhash_1word, rte_jhash_2words
+ * and rte_jhash_3words return the same
+ */
+static int
+verify_jhash_words(void)
+{
+	unsigned i;
+	uint32_t key[3];
+	uint32_t hash, hash_words;
+
+	for (i = 0; i < 3; i++)
+		key[i] = rand();
+
+	/* Test rte_jhash_1word */
+	hash = rte_jhash(key, 4, 0);
+	hash_words = rte_jhash_1word(key[0], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_1word (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+	/* Test rte_jhash_2words */
+	hash = rte_jhash(key, 8, 0);
+	hash_words = rte_jhash_2words(key[0], key[1], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_2words (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+	/* Test rte_jhash_3words */
+	hash = rte_jhash(key, 12, 0);
+	hash_words = rte_jhash_3words(key[0], key[1], key[2], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_3words (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
  * Run all functional tests for hash functions
  */
 static int
@@ -252,6 +297,9 @@ run_hash_func_tests(void)
 	if (verify_jhash_32bits() != 0)
 		return -1;
 
+	if (verify_jhash_words() != 0)
+		return -1;
+
 	return 0;
 
 }
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
@ 2015-06-10 11:05           ` Bruce Richardson
  0 siblings, 0 replies; 62+ messages in thread
From: Bruce Richardson @ 2015-06-10 11:05 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Fri, May 22, 2015 at 11:16:05AM +0100, Pablo de Lara wrote:
> In order to see more clearly the performance difference
> between different hash functions, order of the loops
> have been changed, so it iterates first through initial values,
> then key sizes and then the hash functions.
> 
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> ---
>  app/test/test_hash_functions.c |   20 ++++++++++----------
>  1 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
> index 973fbe8..3b72e8f 100644
> --- a/app/test/test_hash_functions.c
> +++ b/app/test/test_hash_functions.c
> @@ -86,8 +86,8 @@ get_hash_name(rte_hash_function f)
>   * Test a hash function.
>   */
>  static void
> -run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
> -		uint32_t key_len)
> +run_hash_func_perf_test(uint32_t key_len, uint32_t init_val,
> +		rte_hash_function f)
>  {
>  	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
>  	uint64_t ticks, start, end;
> @@ -122,17 +122,17 @@ run_hash_func_perf_tests(void)
>  	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
>  
>  	for (i = 0;
> -	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
> +	     i < sizeof(hashtest_initvals) / sizeof(uint32_t);
>  	     i++) {
>  		for (j = 0;
> -		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
> -		     j++) {
> +		     j < sizeof(hashtest_key_lens) / sizeof(uint32_t);
> +	             j++) {
>  			for (k = 0;
> -			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
> -			     k++) {

These for loops should be changed to use RTE_DIM() macro when possible. It should
allow each loop to just take up one line instead of three, as well as avoiding
changes to the loops if the type of value ever changes from uint32_t.

/Bruce

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available Pablo de Lara
@ 2015-06-10 11:07           ` Bruce Richardson
  0 siblings, 0 replies; 62+ messages in thread
From: Bruce Richardson @ 2015-06-10 11:07 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Fri, May 22, 2015 at 11:16:07AM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> This patch integrates that code into the rte_jhash library.
> It also updates the precalculated hash values in the unit test,
> as the code now returns different values (expected)
> 
This should be clearly called out in the release notes for 2.1. I don't expect
that too many people are relying on the specific return values from the hash
function, but just in case, we need to call this out.

/Bruce

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-06-10 11:09           ` Bruce Richardson
  0 siblings, 0 replies; 62+ messages in thread
From: Bruce Richardson @ 2015-06-10 11:09 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Fri, May 22, 2015 at 11:16:10AM +0100, Pablo de Lara wrote:
> Changed name to something more meaningful,
> and mark rte_jhash2 as deprecated.
> 
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
> ---
>  app/test/test_func_reentrancy.c |    2 +-
>  app/test/test_hash.c            |    4 ++--
>  app/test/test_hash_functions.c  |    6 +++---
>  lib/librte_hash/rte_jhash.h     |   17 +++++++++++++++--
>  4 files changed, 21 insertions(+), 8 deletions(-)
> 
<snip>
> @@ -278,7 +280,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
>   *   IN: second seed OUT: secondary hash value.
>   */
>  static inline void
> -rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
> +rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
>  {
>  	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
>  }
> @@ -321,11 +323,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
>   *   Calculated hash value.
>   */
>  static inline uint32_t
> +rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
> +{
> +	uint32_t initval2 = 0;
> +
> +	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
> +
> +	return initval;
> +}
> +
> +static inline uint32_t
>  rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
>  {
>  	uint32_t initval2 = 0;
>  
> -	rte_jhash2_2hashes(k, length, &initval, &initval2);
> +	RTE_LOG(WARNING, HASH, "rte_jhash2 is deprecated\n");
> +	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
>  
>  	return initval;
>  }

To deprecate this, rather than printing a message each time it is called, just
add "__attribute__((deprecated))" to the definition, and let the compiler do the
work of flagging this to the user.

/Bruce

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 00/10] update jhash function
  2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
                           ` (9 preceding siblings ...)
  2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
@ 2015-06-10 15:25         ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
                             ` (10 more replies)
  10 siblings, 11 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 35% better performance, compared to the original one.

This patchset updates the current jhash in DPDK,
including two new functions that generate two hashes from a single key.

It also separates the existing hash function performance tests to
another file, to make it quicker to run, and add new unit tests.

changes in v6:
- Use RTE_DIM macro, so it saves lines of code
- Correct mistaken performance improvement
- Add deprecated attribute, instead of printing a message calling it
- Add note stating the changes in release notes

changes in v5:
- Add functional tests (mainly to test that all functions 
  return the expected hash values)
- Modify range of key sizes to test
- Change order of output for perf tests, so it is clearer
  to compare different hash functions for same key size/initial value
- Add new initial value to test in the hash functions
- Fix some errors caught by checkpatch
 
changes in v4:
- Simplify key alignment checks
- Include missing x86 arch check

changes in v3:

- Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
  functions

changes in v2:

- Split single commit in three commits, one that updates the existing functions
  and another that adds two new functions and use one of those functions
  as a base to be called by the other ones.
- Remove some unnecessary ifdefs in the code.
- Add new macros to help on the reutilization of constants
- Separate hash function performance tests to another file
  and improve cycle measurements.
- Rename existing function rte_jhash2 to rte_jhash_32b
  (something more meaninful) and mark rte_jhash2 as
  deprecated

De Lara Guarch, Pablo (6):
  test/hash: move hash function perf tests to separate file
  test/hash: improve accuracy on cycle measurements
  test/hash: update key size range and initial values for testing
  test/hash: add new functional tests for hash functions
  hash: add two new functions to jhash library
  test/hash: verify rte_jhash_1word/2words/3words
  test/hash: change order of loops in hash function tests
  hash: update jhash function with the latest available
  hash: remove duplicated code
  hash: rename rte_jhash2 to rte_jhash_32b

 app/test/Makefile                     |   1 +
 app/test/test_func_reentrancy.c       |   2 +-
 app/test/test_hash.c                  |   4 +-
 app/test/test_hash_functions.c        | 321 ++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c             |  71 +------
 doc/guides/rel_notes/new_features.rst |   5 +
 lib/librte_hash/rte_jhash.h           | 339 +++++++++++++++++++++++++---------
 7 files changed, 584 insertions(+), 159 deletions(-)
 create mode 100644 app/test/test_hash_functions.c

-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 01/10] test/hash: move hash function perf tests to separate file
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
                             ` (9 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

This patch moves hash function performance tests to a separate file,
so user can check performance of the existing hash functions quicker,
without having to run all the other hash operation performance tests,
which takes some time.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/Makefile              |   1 +
 app/test/test_hash_functions.c | 147 +++++++++++++++++++++++++++++++++++++++++
 app/test/test_hash_perf.c      |  71 +-------------------
 3 files changed, 149 insertions(+), 70 deletions(-)
 create mode 100644 app/test/test_hash_functions.c

diff --git a/app/test/Makefile b/app/test/Makefile
index 3c777bf..5cf8296 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -83,6 +83,7 @@ SRCS-y += test_memcpy_perf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
new file mode 100644
index 0000000..4efb3cd
--- /dev/null
+++ b/app/test/test_hash_functions.c
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_hash_crc.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Hash function performance test configuration section. Each performance test
+ * will be performed HASHTEST_ITERATIONS times.
+ *
+ * The three arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define HASHTEST_ITERATIONS 1000000
+
+static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
+static uint32_t hashtest_initvals[] = {0};
+static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+/******************************************************************************/
+
+/*
+ * To help print out name of hash functions.
+ */
+static const char *
+get_hash_name(rte_hash_function f)
+{
+	if (f == rte_jhash)
+		return "jhash";
+
+	if (f == rte_hash_crc)
+		return "rte_hash_crc";
+
+	return "UnknownHash";
+}
+
+/*
+ * Test a hash function.
+ */
+static void
+run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
+		uint32_t key_len)
+{
+	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks = 0, start, end;
+	unsigned i, j;
+
+	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
+
+		for (j = 0; j < key_len; j++)
+			key[j] = (uint8_t) rte_rand();
+
+		start = rte_rdtsc();
+		f(key, key_len, init_val);
+		end = rte_rdtsc();
+		ticks += end - start;
+	}
+
+	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
+			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+}
+
+/*
+ * Test all hash functions.
+ */
+static void
+run_hash_func_perf_tests(void)
+{
+	unsigned i, j, k;
+
+	printf(" *** Hash function performance test results ***\n");
+	printf(" Number of iterations for each test = %d\n",
+			HASHTEST_ITERATIONS);
+	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
+
+	for (i = 0;
+	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
+	     i++) {
+		for (j = 0;
+		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
+		     j++) {
+			for (k = 0;
+			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
+			     k++) {
+				run_hash_func_perf_test(hashtest_funcs[i],
+						hashtest_initvals[j],
+						hashtest_key_lens[k]);
+			}
+		}
+	}
+}
+
+static int
+test_hash_functions(void)
+{
+	run_hash_func_perf_tests();
+
+	return 0;
+}
+
+static struct test_command hash_functions_cmd = {
+	.command = "hash_functions_autotest",
+	.callback = test_hash_functions,
+};
+REGISTER_TEST_COMMAND(hash_functions_cmd);
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..d0e5ce0 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -85,20 +85,6 @@ struct tbl_perf_test_params {
 #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15)
 
 /*******************************************************************************
- * Hash function performance test configuration section. Each performance test
- * will be performed HASHTEST_ITERATIONS times.
- *
- * The five arrays below control what tests are performed. Every combination
- * from the array entries is tested.
- */
-#define HASHTEST_ITERATIONS 1000000
-
-static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
-/******************************************************************************/
-
-/*******************************************************************************
  * Hash table performance test configuration section.
  */
 struct tbl_perf_test_params tbl_perf_params[] =
@@ -617,60 +603,6 @@ static int run_all_tbl_perf_tests(void)
 	return 0;
 }
 
-/*
- * Test a hash function.
- */
-static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
-{
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
-	unsigned i, j;
-
-	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
-		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
-	}
-
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
-}
-
-/*
- * Test all hash functions.
- */
-static void run_hash_func_tests(void)
-{
-	unsigned i, j, k;
-
-	printf("\n\n *** Hash function performance test results ***\n");
-	printf(" Number of iterations for each test = %d\n",
-			HASHTEST_ITERATIONS);
-	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
-
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
-				run_hash_func_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
-			}
-		}
-	}
-}
-
 /* Control operation of performance testing of fbk hash. */
 #define LOAD_FACTOR 0.667	/* How full to make the hash table. */
 #define TEST_SIZE 1000000	/* How many operations to time. */
@@ -757,7 +689,6 @@ test_hash_perf(void)
 {
 	if (run_all_tbl_perf_tests() < 0)
 		return -1;
-	run_hash_func_tests();
 
 	if (fbk_hash_perf_test() < 0)
 		return -1;
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 02/10] test/hash: improve accuracy on cycle measurements
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
                             ` (8 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Cycles per hash calculation were measured per single operation.
It is much more accurate to run several iterations between measurements
and divide by number of iterations.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 4efb3cd..767b2bc 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -83,21 +83,21 @@ static void
 run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
-	uint64_t ticks = 0, start, end;
+	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
+	uint64_t ticks, start, end;
 	unsigned i, j;
 
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
-
 		for (j = 0; j < key_len; j++)
-			key[j] = (uint8_t) rte_rand();
-
-		start = rte_rdtsc();
-		f(key, key_len, init_val);
-		end = rte_rdtsc();
-		ticks += end - start;
+			key[i][j] = (uint8_t) rte_rand();
 	}
 
+	start = rte_rdtsc();
+	for (i = 0; i < HASHTEST_ITERATIONS; i++)
+		f(key[i], key_len, init_val);
+	end = rte_rdtsc();
+	ticks = end - start;
+
 	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
 			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
 }
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 03/10] test/hash: update key size range and initial values for testing
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
                             ` (7 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Previous key sizes used for testing did not have much purpose.
This patch substitutes them with some more meaninful
(standard multiple of 2 key sizes, plus IPv4/v6 tuple and others)

Also an arbitrary initial value has been added to increase
the test coverage, and RTE_DIM macro is used to iterate the loops.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 767b2bc..aff5e6d 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -57,8 +57,14 @@
 #define HASHTEST_ITERATIONS 1000000
 
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-static uint32_t hashtest_initvals[] = {0};
-static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64};
+static uint32_t hashtest_initvals[] = {0, 0xdeadbeef};
+static uint32_t hashtest_key_lens[] = {
+	4, 8, 16, 32, 48, 64, /* standard key sizes */
+	9,                    /* IPv4 SRC + DST + protocol, unpadded */
+	13,                   /* IPv4 5-tuple, unpadded */
+	37,                   /* IPv6 5-tuple, unpadded */
+	40                    /* IPv6 5-tuple, padded to 8-byte boundary */
+};
 /******************************************************************************/
 
 /*
@@ -115,15 +121,9 @@ run_hash_func_perf_tests(void)
 			HASHTEST_ITERATIONS);
 	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
 
-	for (i = 0;
-	     i < sizeof(hashtest_funcs) / sizeof(rte_hash_function);
-	     i++) {
-		for (j = 0;
-		     j < sizeof(hashtest_initvals) / sizeof(uint32_t);
-		     j++) {
-			for (k = 0;
-			     k < sizeof(hashtest_key_lens) / sizeof(uint32_t);
-			     k++) {
+	for (i = 0; i < RTE_DIM(hashtest_funcs); i++) {
+		for (j = 0; j < RTE_DIM(hashtest_initvals); j++) {
+			for (k = 0; k < RTE_DIM(hashtest_key_lens); k++) {
 				run_hash_func_perf_test(hashtest_funcs[i],
 						hashtest_initvals[j],
 						hashtest_key_lens[k]);
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 04/10] test/hash: change order of loops in hash function tests
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (2 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
                             ` (6 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

In order to see more clearly the performance difference
between different hash functions, order of the loops
have been changed, so it iterates first through initial values,
then key sizes and then the hash functions.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index aff5e6d..5156dfc 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -86,8 +86,8 @@ get_hash_name(rte_hash_function f)
  * Test a hash function.
  */
 static void
-run_hash_func_perf_test(rte_hash_function f, uint32_t init_val,
-		uint32_t key_len)
+run_hash_func_perf_test(uint32_t key_len, uint32_t init_val,
+		rte_hash_function f)
 {
 	static uint8_t key[HASHTEST_ITERATIONS][RTE_HASH_KEY_LENGTH_MAX];
 	uint64_t ticks, start, end;
@@ -121,12 +121,12 @@ run_hash_func_perf_tests(void)
 			HASHTEST_ITERATIONS);
 	printf("Hash Func.  , Key Length (bytes), Initial value, Ticks/Op.\n");
 
-	for (i = 0; i < RTE_DIM(hashtest_funcs); i++) {
-		for (j = 0; j < RTE_DIM(hashtest_initvals); j++) {
-			for (k = 0; k < RTE_DIM(hashtest_key_lens); k++) {
-				run_hash_func_perf_test(hashtest_funcs[i],
-						hashtest_initvals[j],
-						hashtest_key_lens[k]);
+	for (i = 0; i < RTE_DIM(hashtest_initvals); i++) {
+		for (j = 0; j < RTE_DIM(hashtest_key_lens); j++) {
+			for (k = 0; k < RTE_DIM(hashtest_funcs); k++) {
+				run_hash_func_perf_test(hashtest_key_lens[j],
+						hashtest_initvals[i],
+						hashtest_funcs[k]);
 			}
 		}
 	}
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 05/10] test/hash: add new functional tests for hash functions
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (3 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 06/10] hash: update jhash function with the latest available Pablo de Lara
                             ` (5 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

In order to make sure that the hash functions are returning
the correct values, new tests have been added:

- First test compares precalculated hash values with values calculated
from the existing hash functions.
- Second test compares values returned from rte_jhash2 and rte_jhash,
expecting same return (only for multiple of 4 bytes keys)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c | 126 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 5156dfc..fcc9d05 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -47,6 +47,36 @@
 
 #include "test.h"
 
+/*
+ * Hash values calculated for key sizes from array "hashtest_key_lens"
+ * and for initial values from array "hashtest_initvals.
+ * Each key will be formed by increasing each byte by 1:
+ * e.g.: key size = 4, key = 0x03020100
+ *       key size = 8, key = 0x0706050403020100
+ */
+static uint32_t hash_values_jhash[2][10] = {{
+	0x821cc2db, 0xa491f494, 0xace4cd87, 0x9e867842,
+	0xd32442d6, 0x5fbafeab, 0x9cac434c, 0xecad9b0d,
+	0x2dcf235e, 0xaab655d0
+},
+{
+	0xc1111b14, 0x9a95039e, 0x84f208a0, 0xfa28f3fb,
+	0xfa13f7d3, 0xc7aed470, 0x74caa938, 0xa9288066,
+	0xd0140735, 0xbf00519d
+}
+};
+static uint32_t hash_values_crc[2][10] = {{
+	0x91545164, 0x06040eb1, 0x9bb99201, 0xcc4c4fe4,
+	0x14a90993, 0xf8a5dd8c, 0xc62beb31, 0x32bf340e,
+	0x72f9d22b, 0x4a11475e
+},
+{
+	0x98cd4c70, 0xd52c702f, 0x41fc0e1c, 0x3905f65c,
+	0x94bff47f, 0x1bab102d, 0xd2911ed7, 0xe8faa813,
+	0x6bea184b, 0x53028d3e
+}
+};
+
 /*******************************************************************************
  * Hash function performance test configuration section. Each performance test
  * will be performed HASHTEST_ITERATIONS times.
@@ -132,9 +162,105 @@ run_hash_func_perf_tests(void)
 	}
 }
 
+/*
+ * Verify that hash functions return what they are expected to return
+ * (using precalculated values stored above)
+ */
+static int
+verify_precalculated_hash_func_tests(void)
+{
+	unsigned i, j;
+	uint8_t key[64];
+	uint32_t hash;
+
+	for (i = 0; i < 64; i++)
+		key[i] = (uint8_t) i;
+
+	for (i = 0; i < sizeof(hashtest_key_lens) / sizeof(uint32_t); i++) {
+		for (j = 0; j < sizeof(hashtest_initvals) / sizeof(uint32_t); j++) {
+			hash = rte_jhash(key, hashtest_key_lens[i],
+					hashtest_initvals[j]);
+			if (hash != hash_values_jhash[j][i]) {
+				printf("jhash for %u bytes with initial value 0x%x."
+				       "Expected 0x%x, but got 0x%x\n",
+				       hashtest_key_lens[i], hashtest_initvals[j],
+				       hash_values_jhash[j][i], hash);
+				return -1;
+			}
+
+			hash = rte_hash_crc(key, hashtest_key_lens[i],
+					hashtest_initvals[j]);
+			if (hash != hash_values_crc[j][i]) {
+				printf("CRC for %u bytes with initial value 0x%x."
+				       "Expected 0x%x, but got 0x%x\n",
+				       hashtest_key_lens[i], hashtest_initvals[j],
+				       hash_values_crc[j][i], hash);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Verify that rte_jhash and rte_jhash2 return the same
+ */
+static int
+verify_jhash_32bits(void)
+{
+	unsigned i, j;
+	uint8_t key[64];
+	uint32_t hash, hash32;
+
+	for (i = 0; i < 64; i++)
+		key[i] = rand() & 0xff;
+
+	for (i = 0; i < sizeof(hashtest_key_lens) / sizeof(uint32_t); i++) {
+		for (j = 0; j < sizeof(hashtest_initvals) / sizeof(uint32_t); j++) {
+			/* Key size must be multiple of 4 (32 bits) */
+			if ((hashtest_key_lens[i] & 0x3) == 0) {
+				hash = rte_jhash(key, hashtest_key_lens[i],
+						hashtest_initvals[j]);
+				/* Divide key length by 4 in rte_jhash for 32 bits */
+				hash32 = rte_jhash2((const uint32_t *)key,
+						hashtest_key_lens[i] >> 2,
+						hashtest_initvals[j]);
+				if (hash != hash32) {
+					printf("rte_jhash returns different value (0x%x)"
+					       "than rte_jhash2 (0x%x)\n",
+					       hash, hash32);
+					return -1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Run all functional tests for hash functions
+ */
+static int
+run_hash_func_tests(void)
+{
+	if (verify_precalculated_hash_func_tests() != 0)
+		return -1;
+
+	if (verify_jhash_32bits() != 0)
+		return -1;
+
+	return 0;
+
+}
+
 static int
 test_hash_functions(void)
 {
+	if (run_hash_func_tests() != 0)
+		return -1;
+
 	run_hash_func_perf_tests();
 
 	return 0;
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 06/10] hash: update jhash function with the latest available
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (4 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 07/10] hash: add two new functions to jhash library Pablo de Lara
                             ` (4 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 35% better performance, compared to the original one.

This patch integrates that code into the rte_jhash library.
It also updates the precalculated hash values in the unit test,
as the code now returns different values (expected).

A final note has been added in release notes for stating
the changes made.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c        |  12 +-
 doc/guides/rel_notes/new_features.rst |   5 +
 lib/librte_hash/rte_jhash.h           | 269 +++++++++++++++++++++++++---------
 3 files changed, 207 insertions(+), 79 deletions(-)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index fcc9d05..c6cdccf 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -55,14 +55,14 @@
  *       key size = 8, key = 0x0706050403020100
  */
 static uint32_t hash_values_jhash[2][10] = {{
-	0x821cc2db, 0xa491f494, 0xace4cd87, 0x9e867842,
-	0xd32442d6, 0x5fbafeab, 0x9cac434c, 0xecad9b0d,
-	0x2dcf235e, 0xaab655d0
+	0xe4cf1d42, 0xd4ccb93c, 0x5e84eafc, 0x21362cfe,
+	0x2f4775ab, 0x9ff036cc, 0xeca51474, 0xbc9d6816,
+	0x12926a31, 0x1c9fa888
 },
 {
-	0xc1111b14, 0x9a95039e, 0x84f208a0, 0xfa28f3fb,
-	0xfa13f7d3, 0xc7aed470, 0x74caa938, 0xa9288066,
-	0xd0140735, 0xbf00519d
+	0x8270ac65, 0x05fa6668, 0x762df861, 0xda088f2f,
+	0x59614cd4, 0x7a94f690, 0xdc1e4993, 0x30825494,
+	0x91d0e462, 0x768087fc
 }
 };
 static uint32_t hash_values_crc[2][10] = {{
diff --git a/doc/guides/rel_notes/new_features.rst b/doc/guides/rel_notes/new_features.rst
index 4b4974d..5b724ab 100644
--- a/doc/guides/rel_notes/new_features.rst
+++ b/doc/guides/rel_notes/new_features.rst
@@ -121,4 +121,9 @@ New Features
 
 *   Job Stats library and Sample Application.
 
+*   Enhanced Jenkins hash (jhash) library
+
+.. note:: The hash values returned by the new jhash library are different
+          from the ones returned by the previous library.
+
 For further features supported in this release, see Chapter 3 Supported Features.
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..1cb5c44 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,62 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <string.h>
+#include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */
 
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-	a -= b; a -= c; a ^= (c>>13); \
-	b -= c; b -= a; b ^= (a<<8); \
-	c -= a; c -= b; c ^= (b>>13); \
-	a -= b; a -= c; a ^= (c>>12); \
-	b -= c; b -= a; b ^= (a<<16); \
-	c -= a; c -= b; c ^= (b>>5); \
-	a -= b; a -= c; a ^= (c>>3); \
-	b -= c; b -= a; b ^= (a<<10); \
-	c -= a; c -= b; c ^= (b>>15); \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
 } while (0)
 
 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO      0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k))))
+#else
+#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k))))
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +119,130 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
-	const uint8_t *k = (const uint8_t *)key;
-	const uint32_t *k32 = (const uint32_t *)key;
+	uint32_t a, b, c;
 
-	len = length;
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
 
-	while (len >= 12) {
-		a += k32[0];
-		b += k32[1];
-		c += k32[2];
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
 
-		__rte_jhash_mix(a,b,c);
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
 
-		k += (3 * sizeof(uint32_t)), k32 += 3;
-		len -= (3 * sizeof(uint32_t));
-	}
+			__rte_jhash_mix(a, b, c);
 
-	c += length;
-	switch (len) {
-		case 11: c += ((uint32_t)k[10] << 24);
-		case 10: c += ((uint32_t)k[9] << 16);
-		case 9 : c += ((uint32_t)k[8] << 8);
-		case 8 : b += ((uint32_t)k[7] << 24);
-		case 7 : b += ((uint32_t)k[6] << 16);
-		case 6 : b += ((uint32_t)k[5] << 8);
-		case 5 : b += k[4];
-		case 4 : a += ((uint32_t)k[3] << 24);
-		case 3 : a += ((uint32_t)k[2] << 16);
-		case 2 : a += ((uint32_t)k[1] << 8);
-		case 1 : a += k[0];
-		default: break;
-	};
+			k += 3;
+			length -= 12;
+		}
 
-	__rte_jhash_mix(a,b,c);
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			return c;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
 
 	return c;
 }
@@ -151,33 +263,54 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
-	uint32_t a, b, c, len;
+	uint32_t a, b, c;
 
-	a = b = RTE_JHASH_GOLDEN_RATIO;
-	c = initval;
-	len = length;
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
 
-	while (len >= 3) {
+	/* Handle most of the key */
+	while (length > 3) {
 		a += k[0];
 		b += k[1];
 		c += k[2];
+
 		__rte_jhash_mix(a, b, c);
-		k += 3; len -= 3;
-	}
 
-	c += length * 4;
+		k += 3;
+		length -= 3;
+	}
 
-	switch (len) {
-		case 2 : b += k[1];
-		case 1 : a += k[0];
-		default: break;
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+		/* Fallthrough */
+	case 2:
+		b += k[1];
+		/* Fallthrough */
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+		/* Fallthrough */
+	/* case 0: nothing left to add */
+	case 0:
+		break;
 	};
 
-	__rte_jhash_mix(a,b,c);
-
 	return c;
 }
 
+static inline uint32_t
+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += RTE_JHASH_GOLDEN_RATIO + initval;
+	b += RTE_JHASH_GOLDEN_RATIO + initval;
+	c += RTE_JHASH_GOLDEN_RATIO + initval;
+
+	__rte_jhash_final(a, b, c);
+
+	return c;
+}
 
 /**
  * A special ultra-optimized versions that knows it is hashing exactly
@@ -197,17 +330,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline uint32_t
 rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-	a += RTE_JHASH_GOLDEN_RATIO;
-	b += RTE_JHASH_GOLDEN_RATIO;
-	c += initval;
-
-	__rte_jhash_mix(a, b, c);
-
-	/*
-	 * NOTE: In particular the "c += length; __rte_jhash_mix(a,b,c);"
-	 *       normally done at the end is not done here.
-	 */
-	return c;
+	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
 }
 
 /**
@@ -226,7 +349,7 @@ rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 static inline uint32_t
 rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 {
-	return rte_jhash_3words(a, b, 0, initval);
+	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
 }
 
 /**
@@ -243,7 +366,7 @@ rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
 static inline uint32_t
 rte_jhash_1word(uint32_t a, uint32_t initval)
 {
-	return rte_jhash_3words(a, 0, 0, initval);
+	return __rte_jhash_3words(a + 4, 4, 4, initval);
 }
 
 #ifdef __cplusplus
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 07/10] hash: add two new functions to jhash library
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (5 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 06/10] hash: update jhash function with the latest available Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code Pablo de Lara
                             ` (3 subsequent siblings)
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

With the jhash update, two new functions were introduced:

- rte_jhash_2hashes: Same as rte_jhash, but takes two seeds
                     and return two hashes (uint32_ts)

- rte_jhash2_2hashes: Same as rte_jhash2, but takes two seeds
                     and return two hashes (uint32_ts)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h | 209 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 1cb5c44..adfcef7 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -300,6 +300,215 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 	return c;
 }
 
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	/* Check key alignment. For x86 architecture, first case is always optimal */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
+
+	if (s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash2, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash2.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pc
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
+	c += *pb;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+
+		__rte_jhash_mix(a, b, c);
+
+		k += 3;
+		length -= 3;
+	}
+
+	/* Handle the last 3 uint32_t's */
+	switch (length) {
+	case 3:
+		c += k[2];
+		/* Fallthrough */
+	case 2:
+		b += k[1];
+		/* Fallthrough */
+	case 1:
+		a += k[0];
+		__rte_jhash_final(a, b, c);
+		/* Fallthrough */
+	/* case 0: nothing left to add */
+	case 0:
+		break;
+	};
+
+	*pc = c;
+	*pb = b;
+}
+
 static inline uint32_t
 __rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
 {
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (6 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 07/10] hash: add two new functions to jhash library Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-16  9:33             ` Thomas Monjalon
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
                             ` (2 subsequent siblings)
  10 siblings, 1 reply; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

rte_jhash is basically like __rte_jhash_2hashes but
it returns only 1 hash, instead of 2.
In order to remove duplicated code, rte_jhash calls __rte_jhash_2hashes,
passing 0 as the second seed and returning just the first hash value.
(performance penalty is negligible)

The same is done with rte_jhash2. Also, rte_jhash2 is just an specific case
where keys are multiple of 32 bits, and where no key alignment check is required.
So,to avoid duplicated code, the function calls __rte_jhash_2hashes
with check_align = 0 (to use the optimal path)

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/librte_hash/rte_jhash.h | 300 +++++++++-----------------------------------
 1 file changed, 61 insertions(+), 239 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index adfcef7..dd19ce0 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -102,29 +102,20 @@ extern "C" {
 #define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
 #define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
 
-/**
- * The most generic version, hashes an arbitrary sequence
- * of bytes.  No alignment or length assumptions are made about
- * the input key.
- *
- * @param key
- *   Key to calculate hash of.
- * @param length
- *   Length of key in bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash(const void *key, uint32_t length, uint32_t initval)
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc,
+		uint32_t *pb, unsigned check_align)
 {
 	uint32_t a, b, c;
 
 	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
 
-	/* Check key alignment. For x86 architecture, first case is always optimal */
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
 #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
 	const uint32_t *k = key;
 	const uint32_t s = 0;
@@ -132,8 +123,7 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
 	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
 #endif
-
-	if (s == 0) {
+	if (!check_align || s == 0) {
 		while (length > 12) {
 			a += k[0];
 			b += k[1];
@@ -172,7 +162,9 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			a += k[0] & LOWER8b_MASK; break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		};
 	} else {
 		/* all but the last block: affect some 32 bits of (a, b, c) */
@@ -238,66 +230,16 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
 			break;
 		/* zero length strings require no mixing */
 		case 0:
-			return c;
+			*pc = c;
+			*pb = b;
+			return;
 		}
 	}
 
 	__rte_jhash_final(a, b, c);
 
-	return c;
-}
-
-/**
- * A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- *
- * @param k
- *   Key to calculate hash of.
- * @param length
- *   Length of key in units of 4 bytes.
- * @param initval
- *   Initialising value of hash.
- * @return
- *   Calculated hash value.
- */
-static inline uint32_t
-rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
-{
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-
-		__rte_jhash_mix(a, b, c);
-
-		k += 3;
-		length -= 3;
-	}
-
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-		/* Fallthrough */
-	case 2:
-		b += k[1];
-		/* Fallthrough */
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-		/* Fallthrough */
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
-
-	return c;
+	*pc = c;
+	*pb = b;
 }
 
 /**
@@ -318,138 +260,7 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 static inline void
 rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
-	c += *pb;
-
-	/* Check key alignment. For x86 architecture, first case is always optimal */
-#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
-	const uint32_t *k = key;
-	const uint32_t s = 0;
-#else
-	const uint32_t *k = (uint32_t *)(uintptr_t)key & (uintptr_t)~3);
-	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
-#endif
-
-	if (s == 0) {
-		while (length > 12) {
-			a += k[0];
-			b += k[1];
-			c += k[2];
-
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		switch (length) {
-		case 12:
-			c += k[2]; b += k[1]; a += k[0]; break;
-		case 11:
-			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
-		case 10:
-			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
-		case 9:
-			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
-		case 8:
-			b += k[1]; a += k[0]; break;
-		case 7:
-			b += k[1] & LOWER24b_MASK; a += k[0]; break;
-		case 6:
-			b += k[1] & LOWER16b_MASK; a += k[0]; break;
-		case 5:
-			b += k[1] & LOWER8b_MASK; a += k[0]; break;
-		case 4:
-			a += k[0]; break;
-		case 3:
-			a += k[0] & LOWER24b_MASK; break;
-		case 2:
-			a += k[0] & LOWER16b_MASK; break;
-		case 1:
-			a += k[0] & LOWER8b_MASK; break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		};
-	} else {
-		/* all but the last block: affect some 32 bits of (a, b, c) */
-		while (length > 12) {
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			__rte_jhash_mix(a, b, c);
-
-			k += 3;
-			length -= 12;
-		}
-
-		/* last block: affect all 32 bits of (c) */
-		switch (length) {
-		case 12:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s);
-			break;
-		case 11:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
-			break;
-		case 10:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
-			break;
-		case 9:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
-			break;
-		case 8:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s);
-			break;
-		case 7:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
-			break;
-		case 6:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
-			break;
-		case 5:
-			a += BIT_SHIFT(k[0], k[1], s);
-			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
-			break;
-		case 4:
-			a += BIT_SHIFT(k[0], k[1], s);
-			break;
-		case 3:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
-			break;
-		case 2:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
-			break;
-		case 1:
-			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
-			break;
-		/* zero length strings require no mixing */
-		case 0:
-			*pc = c;
-			*pb = b;
-			return;
-		}
-	}
-
-	__rte_jhash_final(a, b, c);
-
-	*pc = c;
-	*pb = b;
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
 }
 
 /**
@@ -470,43 +281,54 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
 static inline void
 rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
-	uint32_t a, b, c;
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
 
-	/* Set up the internal state */
-	a = b = c = RTE_JHASH_GOLDEN_RATIO + (((uint32_t)length) << 2) + *pc;
-	c += *pb;
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
+	rte_jhash_2hashes(key, length, &initval, &initval2);
 
-		__rte_jhash_mix(a, b, c);
+	return initval;
+}
 
-		k += 3;
-		length -= 3;
-	}
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
 
-	/* Handle the last 3 uint32_t's */
-	switch (length) {
-	case 3:
-		c += k[2];
-		/* Fallthrough */
-	case 2:
-		b += k[1];
-		/* Fallthrough */
-	case 1:
-		a += k[0];
-		__rte_jhash_final(a, b, c);
-		/* Fallthrough */
-	/* case 0: nothing left to add */
-	case 0:
-		break;
-	};
+	rte_jhash2_2hashes(k, length, &initval, &initval2);
 
-	*pc = c;
-	*pb = b;
+	return initval;
 }
 
 static inline uint32_t
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 09/10] hash: rename rte_jhash2 to rte_jhash_32b
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (7 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
  2015-06-12 10:37           ` [dpdk-dev] [PATCH v6 00/10] update jhash function Bruce Richardson
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Changed name to something more meaningful,
and mark rte_jhash2 as deprecated.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_func_reentrancy.c |  2 +-
 app/test/test_hash.c            |  4 ++--
 app/test/test_hash_functions.c  |  6 +++---
 lib/librte_hash/rte_jhash.h     | 17 +++++++++++++++--
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index dc070af..85504c0 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -228,7 +228,7 @@ hash_create_free(__attribute__((unused)) void *arg)
 		.entries = 16,
 		.bucket_entries = 4,
 		.key_len = 4,
-		.hash_func = (rte_hash_function)rte_jhash2,
+		.hash_func = (rte_hash_function)rte_jhash_32b,
 		.hash_func_init_val = 0,
 		.socket_id = 0,
 	};
diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 1da27c5..4ecb11b 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -1177,7 +1177,7 @@ test_hash_add_delete_jhash2(void)
 
 	hash_params_ex.name = "hash_test_jhash2";
 	hash_params_ex.key_len = 4;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL) {
@@ -1216,7 +1216,7 @@ test_hash_add_delete_2_jhash2(void)
 
 	hash_params_ex.name = "hash_test_2_jhash2";
 	hash_params_ex.key_len = 8;
-	hash_params_ex.hash_func = (rte_hash_function)rte_jhash2;
+	hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b;
 
 	handle = rte_hash_create(&hash_params_ex);
 	if (handle == NULL)
diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index c6cdccf..8af8601 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -204,7 +204,7 @@ verify_precalculated_hash_func_tests(void)
 }
 
 /*
- * Verify that rte_jhash and rte_jhash2 return the same
+ * Verify that rte_jhash and rte_jhash_32b return the same
  */
 static int
 verify_jhash_32bits(void)
@@ -223,12 +223,12 @@ verify_jhash_32bits(void)
 				hash = rte_jhash(key, hashtest_key_lens[i],
 						hashtest_initvals[j]);
 				/* Divide key length by 4 in rte_jhash for 32 bits */
-				hash32 = rte_jhash2((const uint32_t *)key,
+				hash32 = rte_jhash_32b((const uint32_t *)key,
 						hashtest_key_lens[i] >> 2,
 						hashtest_initvals[j]);
 				if (hash != hash32) {
 					printf("rte_jhash returns different value (0x%x)"
-					       "than rte_jhash2 (0x%x)\n",
+					       "than rte_jhash_32b (0x%x)\n",
 					       hash, hash32);
 					return -1;
 				}
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index dd19ce0..649d7c7 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -46,6 +46,8 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+
+#include <rte_log.h>
 #include <rte_byteorder.h>
 
 /* jhash.h: Jenkins hash support.
@@ -279,7 +281,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
-rte_jhash2_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
 {
 	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
 }
@@ -322,11 +324,22 @@ rte_jhash(const void *key, uint32_t length, uint32_t initval)
  *   Calculated hash value.
  */
 static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
+
+static inline uint32_t
+__attribute__ ((deprecated))
 rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
 {
 	uint32_t initval2 = 0;
 
-	rte_jhash2_2hashes(k, length, &initval, &initval2);
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
 
 	return initval;
 }
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [dpdk-dev] [PATCH v6 10/10] test/hash: verify rte_jhash_1word/2words/3words
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (8 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
@ 2015-06-10 15:25           ` Pablo de Lara
  2015-06-12 10:37           ` [dpdk-dev] [PATCH v6 00/10] update jhash function Bruce Richardson
  10 siblings, 0 replies; 62+ messages in thread
From: Pablo de Lara @ 2015-06-10 15:25 UTC (permalink / raw)
  To: dev

Added new test that verifies that rte_jhash_1words,
rte_jhash_2words and rte_jhash_3words return the same
values as rte_jhash.

Note that this patch has been added after the update
of the jhash function because these 3 functions did not
return the same values as rte_jhash before

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_hash_functions.c | 48 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/app/test/test_hash_functions.c b/app/test/test_hash_functions.c
index 8af8601..df7c909 100644
--- a/app/test/test_hash_functions.c
+++ b/app/test/test_hash_functions.c
@@ -240,6 +240,51 @@ verify_jhash_32bits(void)
 }
 
 /*
+ * Verify that rte_jhash and rte_jhash_1word, rte_jhash_2words
+ * and rte_jhash_3words return the same
+ */
+static int
+verify_jhash_words(void)
+{
+	unsigned i;
+	uint32_t key[3];
+	uint32_t hash, hash_words;
+
+	for (i = 0; i < 3; i++)
+		key[i] = rand();
+
+	/* Test rte_jhash_1word */
+	hash = rte_jhash(key, 4, 0);
+	hash_words = rte_jhash_1word(key[0], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_1word (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+	/* Test rte_jhash_2words */
+	hash = rte_jhash(key, 8, 0);
+	hash_words = rte_jhash_2words(key[0], key[1], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_2words (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+	/* Test rte_jhash_3words */
+	hash = rte_jhash(key, 12, 0);
+	hash_words = rte_jhash_3words(key[0], key[1], key[2], 0);
+	if (hash != hash_words) {
+		printf("rte_jhash returns different value (0x%x)"
+		       "than rte_jhash_3words (0x%x)\n",
+		       hash, hash_words);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
  * Run all functional tests for hash functions
  */
 static int
@@ -251,6 +296,9 @@ run_hash_func_tests(void)
 	if (verify_jhash_32bits() != 0)
 		return -1;
 
+	if (verify_jhash_words() != 0)
+		return -1;
+
 	return 0;
 
 }
-- 
2.4.2

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v6 00/10] update jhash function
  2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
                             ` (9 preceding siblings ...)
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
@ 2015-06-12 10:37           ` Bruce Richardson
  2015-06-16 10:22             ` Thomas Monjalon
  10 siblings, 1 reply; 62+ messages in thread
From: Bruce Richardson @ 2015-06-12 10:37 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

On Wed, Jun 10, 2015 at 04:25:17PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 35% better performance, compared to the original one.
> 
> This patchset updates the current jhash in DPDK,
> including two new functions that generate two hashes from a single key.
> 
> It also separates the existing hash function performance tests to
> another file, to make it quicker to run, and add new unit tests.
> 
> changes in v6:
> - Use RTE_DIM macro, so it saves lines of code
> - Correct mistaken performance improvement
> - Add deprecated attribute, instead of printing a message calling it
> - Add note stating the changes in release notes
> 
> changes in v5:
> - Add functional tests (mainly to test that all functions 
>   return the expected hash values)
> - Modify range of key sizes to test
> - Change order of output for perf tests, so it is clearer
>   to compare different hash functions for same key size/initial value
> - Add new initial value to test in the hash functions
> - Fix some errors caught by checkpatch
>  
> changes in v4:
> - Simplify key alignment checks
> - Include missing x86 arch check
> 
> changes in v3:
> 
> - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
>   functions
> 
> changes in v2:
> 
> - Split single commit in three commits, one that updates the existing functions
>   and another that adds two new functions and use one of those functions
>   as a base to be called by the other ones.
> - Remove some unnecessary ifdefs in the code.
> - Add new macros to help on the reutilization of constants
> - Separate hash function performance tests to another file
>   and improve cycle measurements.
> - Rename existing function rte_jhash2 to rte_jhash_32b
>   (something more meaninful) and mark rte_jhash2 as
>   deprecated
>
Thanks for the all the work, and rework, Pablo.

Series Acked-by: Bruce Richardson <bruce.richardson@intel.com>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code
  2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code Pablo de Lara
@ 2015-06-16  9:33             ` Thomas Monjalon
  2015-06-16 10:31               ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 62+ messages in thread
From: Thomas Monjalon @ 2015-06-16  9:33 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

The following patch is needed and will be squashed to fix some doxygen issues:

--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -306,13 +306,13 @@ rte_jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
  * with seeds. If you pass in (*pb)=0, the output (*pc) will be
  * the same as the return value from rte_jhash.
  *
- * @param k
+ * @param key
  *   Key to calculate hash of.
  * @param length
  *   Length of key in bytes.
  * @param pc
  *   IN: seed OUT: primary hash value.
- * @param pc
+ * @param pb
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void
@@ -464,7 +464,7 @@ rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
  *   Length of key in units of 4 bytes.
  * @param pc
  *   IN: seed OUT: primary hash value.
- * @param pc
+ * @param pb
  *   IN: second seed OUT: secondary hash value.
  */
 static inline void

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v6 00/10] update jhash function
  2015-06-12 10:37           ` [dpdk-dev] [PATCH v6 00/10] update jhash function Bruce Richardson
@ 2015-06-16 10:22             ` Thomas Monjalon
  0 siblings, 0 replies; 62+ messages in thread
From: Thomas Monjalon @ 2015-06-16 10:22 UTC (permalink / raw)
  To: Pablo de Lara; +Cc: dev

2015-06-12 11:37, Bruce Richardson:
> On Wed, Jun 10, 2015 at 04:25:17PM +0100, Pablo de Lara wrote:
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 35% better performance, compared to the original one.
> > 
> > This patchset updates the current jhash in DPDK,
> > including two new functions that generate two hashes from a single key.
> > 
> > It also separates the existing hash function performance tests to
> > another file, to make it quicker to run, and add new unit tests.
> > 
> > changes in v6:
> > - Use RTE_DIM macro, so it saves lines of code
> > - Correct mistaken performance improvement
> > - Add deprecated attribute, instead of printing a message calling it
> > - Add note stating the changes in release notes
> > 
> > changes in v5:
> > - Add functional tests (mainly to test that all functions 
> >   return the expected hash values)
> > - Modify range of key sizes to test
> > - Change order of output for perf tests, so it is clearer
> >   to compare different hash functions for same key size/initial value
> > - Add new initial value to test in the hash functions
> > - Fix some errors caught by checkpatch
> >  
> > changes in v4:
> > - Simplify key alignment checks
> > - Include missing x86 arch check
> > 
> > changes in v3:
> > 
> > - Update rte_jhash_1word, rte_jhash_2words and rte_jhash_3words
> >   functions
> > 
> > changes in v2:
> > 
> > - Split single commit in three commits, one that updates the existing functions
> >   and another that adds two new functions and use one of those functions
> >   as a base to be called by the other ones.
> > - Remove some unnecessary ifdefs in the code.
> > - Add new macros to help on the reutilization of constants
> > - Separate hash function performance tests to another file
> >   and improve cycle measurements.
> > - Rename existing function rte_jhash2 to rte_jhash_32b
> >   (something more meaninful) and mark rte_jhash2 as
> >   deprecated
> >
> Thanks for the all the work, and rework, Pablo.
> 
> Series Acked-by: Bruce Richardson <bruce.richardson@intel.com>

Applied, thanks
Some doxygen typos has been fixed on the fly.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code
  2015-06-16  9:33             ` Thomas Monjalon
@ 2015-06-16 10:31               ` De Lara Guarch, Pablo
  2015-06-16 13:08                 ` Thomas Monjalon
  0 siblings, 1 reply; 62+ messages in thread
From: De Lara Guarch, Pablo @ 2015-06-16 10:31 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Tuesday, June 16, 2015 10:33 AM
> To: De Lara Guarch, Pablo
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code
> 
> The following patch is needed and will be squashed to fix some doxygen
> issues:
> 
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -306,13 +306,13 @@ rte_jhash2(const uint32_t *k, uint32_t length,
> uint32_t initval)
>   * with seeds. If you pass in (*pb)=0, the output (*pc) will be
>   * the same as the return value from rte_jhash.
>   *
> - * @param k
> + * @param key
>   *   Key to calculate hash of.
>   * @param length
>   *   Length of key in bytes.
>   * @param pc
>   *   IN: seed OUT: primary hash value.
> - * @param pc
> + * @param pb
>   *   IN: second seed OUT: secondary hash value.
>   */
>  static inline void
> @@ -464,7 +464,7 @@ rte_jhash_2hashes(const void *key, uint32_t length,
> uint32_t *pc, uint32_t *pb)
>   *   Length of key in units of 4 bytes.
>   * @param pc
>   *   IN: seed OUT: primary hash value.
> - * @param pc
> + * @param pb
>   *   IN: second seed OUT: secondary hash value.
>   */
>  static inline void

Thanks for spotting this!
Are you going to do it yourself or you want me to do it and send a v7?

Pablo

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code
  2015-06-16 10:31               ` De Lara Guarch, Pablo
@ 2015-06-16 13:08                 ` Thomas Monjalon
  0 siblings, 0 replies; 62+ messages in thread
From: Thomas Monjalon @ 2015-06-16 13:08 UTC (permalink / raw)
  To: De Lara Guarch, Pablo; +Cc: dev

2015-06-16 10:31, De Lara Guarch, Pablo:
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > The following patch is needed and will be squashed to fix some doxygen
> > issues:
|...]
|> Thanks for spotting this!
> Are you going to do it yourself or you want me to do it and send a v7?

Already squashed with patch 7:
	http://dpdk.org/browse/dpdk/commit/?id=8718219a8737b8

^ permalink raw reply	[flat|nested] 62+ messages in thread

end of thread, other threads:[~2015-06-16 13:09 UTC | newest]

Thread overview: 62+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-16 13:26 [dpdk-dev] [PATCH] hash: update jhash function with the latest available Pablo de Lara
2015-04-16 14:01 ` Bruce Richardson
2015-04-17 16:03   ` De Lara Guarch, Pablo
2015-04-24 11:23 ` [dpdk-dev] [PATCH v2 0/6] update jhash function Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 3/6] hash: update jhash function with the latest available Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 4/6] hash: add two new functions to jhash library Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 5/6] hash: remove duplicated code Pablo de Lara
2015-04-24 11:23   ` [dpdk-dev] [PATCH v2 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
2015-05-05 14:43   ` [dpdk-dev] [PATCH v3 0/6] update jhash function Pablo de Lara
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 3/6] hash: update jhash function with the latest available Pablo de Lara
2015-05-06  0:35       ` Ananyev, Konstantin
2015-05-06  9:36         ` De Lara Guarch, Pablo
2015-05-06 16:11           ` Ananyev, Konstantin
2015-05-07 11:11           ` Ananyev, Konstantin
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 4/6] hash: add two new functions to jhash library Pablo de Lara
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 5/6] hash: remove duplicated code Pablo de Lara
2015-05-05 14:43     ` [dpdk-dev] [PATCH v3 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
2015-05-12 11:02     ` [dpdk-dev] [PATCH v4 0/6] update jhash function Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 1/6] test/hash: move hash function perf tests to separate file Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 2/6] test/hash: improve accuracy on cycle measurements Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 3/6] hash: update jhash function with the latest available Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 4/6] hash: add two new functions to jhash library Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 5/6] hash: remove duplicated code Pablo de Lara
2015-05-12 11:02       ` [dpdk-dev] [PATCH v4 6/6] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
2015-05-12 15:33       ` [dpdk-dev] [PATCH v4 0/6] update jhash function Neil Horman
2015-05-13 13:52         ` De Lara Guarch, Pablo
2015-05-13 14:20           ` Neil Horman
2015-05-18 16:14       ` Bruce Richardson
2015-05-22 10:16       ` [dpdk-dev] [PATCH v5 00/10] " Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
2015-06-10 11:05           ` Bruce Richardson
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 06/10] hash: update jhash function with the latest available Pablo de Lara
2015-06-10 11:07           ` Bruce Richardson
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 07/10] hash: add two new functions to jhash library Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 08/10] hash: remove duplicated code Pablo de Lara
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
2015-06-10 11:09           ` Bruce Richardson
2015-05-22 10:16         ` [dpdk-dev] [PATCH v5 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
2015-06-10 15:25         ` [dpdk-dev] [PATCH v6 00/10] update jhash function Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 01/10] test/hash: move hash function perf tests to separate file Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 02/10] test/hash: improve accuracy on cycle measurements Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 03/10] test/hash: update key size range and initial values for testing Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 04/10] test/hash: change order of loops in hash function tests Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 05/10] test/hash: add new functional tests for hash functions Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 06/10] hash: update jhash function with the latest available Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 07/10] hash: add two new functions to jhash library Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 08/10] hash: remove duplicated code Pablo de Lara
2015-06-16  9:33             ` Thomas Monjalon
2015-06-16 10:31               ` De Lara Guarch, Pablo
2015-06-16 13:08                 ` Thomas Monjalon
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 09/10] hash: rename rte_jhash2 to rte_jhash_32b Pablo de Lara
2015-06-10 15:25           ` [dpdk-dev] [PATCH v6 10/10] test/hash: verify rte_jhash_1word/2words/3words Pablo de Lara
2015-06-12 10:37           ` [dpdk-dev] [PATCH v6 00/10] update jhash function Bruce Richardson
2015-06-16 10:22             ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).