From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <michael.qiu@intel.com>
Received: from mga11.intel.com (mga11.intel.com [192.55.52.93])
 by dpdk.org (Postfix) with ESMTP id 870A75936
 for <dev@dpdk.org>; Wed, 23 Dec 2015 10:12:52 +0100 (CET)
Received: from fmsmga003.fm.intel.com ([10.253.24.29])
 by fmsmga102.fm.intel.com with ESMTP; 23 Dec 2015 01:12:51 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.20,468,1444719600"; d="scan'208";a="622667623"
Received: from fmsmsx106.amr.corp.intel.com ([10.18.124.204])
 by FMSMGA003.fm.intel.com with ESMTP; 23 Dec 2015 01:12:38 -0800
Received: from fmsmsx154.amr.corp.intel.com (10.18.116.70) by
 FMSMSX106.amr.corp.intel.com (10.18.124.204) with Microsoft SMTP Server (TLS)
 id 14.3.248.2; Wed, 23 Dec 2015 01:12:38 -0800
Received: from shsmsx152.ccr.corp.intel.com (10.239.6.52) by
 FMSMSX154.amr.corp.intel.com (10.18.116.70) with Microsoft SMTP Server (TLS)
 id 14.3.248.2; Wed, 23 Dec 2015 01:12:37 -0800
Received: from shsmsx101.ccr.corp.intel.com ([169.254.1.190]) by
 SHSMSX152.ccr.corp.intel.com ([169.254.6.105]) with mapi id 14.03.0248.002;
 Wed, 23 Dec 2015 17:12:35 +0800
From: "Qiu, Michael" <michael.qiu@intel.com>
To: Didier Pallard <didier.pallard@6wind.com>, "dev@dpdk.org" <dev@dpdk.org>, 
 "Richardson, Bruce" <bruce.richardson@intel.com>, "De Lara Guarch, Pablo"
 <pablo.de.lara.guarch@intel.com>
Thread-Topic: [dpdk-dev] [PATCH] hash: fix CRC32c computation
Thread-Index: AQHRPJwuz2yLNzBdWUKQmyU6cTYNEQ==
Date: Wed, 23 Dec 2015 09:12:35 +0000
Message-ID: <533710CFB86FA344BFBF2D6802E6028622EFE9FB@SHSMSX101.ccr.corp.intel.com>
References: <1450776898-8951-1-git-send-email-didier.pallard@6wind.com>
Accept-Language: en-US
Content-Language: en-US
X-MS-Has-Attach: 
X-MS-TNEF-Correlator: 
x-originating-ip: [10.239.127.40]
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
Subject: Re: [dpdk-dev] [PATCH] hash: fix CRC32c computation
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Wed, 23 Dec 2015 09:12:53 -0000

Is it suitable to put so many code in commit log?=0A=
=0A=
Thanks,=0A=
Michael=0A=
On 12/22/2015 5:36 PM, Didier Pallard wrote:=0A=
> As demonstrated by the following code, CRC32c computation is not valid=0A=
> when buffer length is not a multiple of 4 bytes:=0A=
> (Output obtained by code below)=0A=
>=0A=
> CRC of 1 NULL bytes expected: 0x527d5351=0A=
>     soft: 527d5351=0A=
>     rte accelerated: 48674bc7=0A=
>     rte soft: 48674bc7=0A=
> CRC of 2 NULL bytes expected: 0xf16177d2=0A=
>     soft: f16177d2=0A=
>     rte accelerated: 48674bc7=0A=
>     rte soft: 48674bc7=0A=
> CRC of 2x1 NULL bytes expected: 0xf16177d2=0A=
>     soft: f16177d2=0A=
>     rte accelerated: 8c28b28a=0A=
>     rte soft: 8c28b28a=0A=
> CRC of 3 NULL bytes expected: 0x6064a37a=0A=
>     soft: 6064a37a=0A=
>     rte accelerated: 48674bc7=0A=
>     rte soft: 48674bc7=0A=
> CRC of 4 NULL bytes expected: 0x48674bc7=0A=
>     soft: 48674bc7=0A=
>     rte accelerated: 48674bc7=0A=
>     rte soft: 48674bc7=0A=
>=0A=
> Values returned by rte_hash_crc functions does not match the one=0A=
> computed by a trivial crc32c implementation.=0A=
>=0A=
> ARM code is a guess, it is not tested, neither compiled.=0A=
>=0A=
> code showing the problem:=0A=
>=0A=
> uint8_t null_test[32] =3D {0};=0A=
>=0A=
> static uint32_t crc32c_trivial(uint8_t *buffer, uint32_t length, uint32_t=
 crc)=0A=
> {=0A=
>     uint32_t i, j;=0A=
>     for (i =3D 0; i < length; ++i)=0A=
>     {=0A=
>         crc =3D crc ^ buffer[i];=0A=
>         for (j =3D 0; j < 8; j++)=0A=
>             crc =3D (crc >> 1) ^ 0x80000000 ^ ((~crc & 1) * 0x82f63b78);=
=0A=
>     }=0A=
>     return crc;=0A=
> }=0A=
>=0A=
> void hash_test(void);=0A=
> void hash_test(void)=0A=
> {=0A=
> 	printf("CRC of 1 nul byte expected: 0x527d5351\n");=0A=
> 	printf("    soft: %08x\n", crc32c_trivial(null_test, 1, 0));=0A=
> 	rte_hash_crc_init_alg();=0A=
> 	printf("    rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFF=
FFFF));=0A=
> 	rte_hash_crc_set_alg(CRC32_SW);=0A=
> 	printf("    rte soft: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFFFFFF));=
=0A=
>=0A=
> 	printf("CRC of 2 nul bytes expected: 0xf16177d2\n");=0A=
> 	printf("    soft: %08x\n", crc32c_trivial(null_test, 2, 0));=0A=
> 	rte_hash_crc_init_alg();=0A=
> 	printf("    rte accelerated: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFF=
FFFF));=0A=
> 	rte_hash_crc_set_alg(CRC32_SW);=0A=
> 	printf("    rte soft: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFFFFFF));=
=0A=
>=0A=
> 	printf("CRC of 2x1 nul bytes expected: 0xf16177d2\n");=0A=
> 	printf("    soft: %08x\n", crc32c_trivial(null_test, 1, crc32c_trivial(n=
ull_test, 1, 0)));=0A=
> 	rte_hash_crc_init_alg();=0A=
> 	printf("    rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, rte_ha=
sh_crc(null_test, 1, 0xFFFFFFFF)));=0A=
> 	rte_hash_crc_set_alg(CRC32_SW);=0A=
> 	printf("    rte soft: %08x\n", ~rte_hash_crc(null_test, 1, rte_hash_crc(=
null_test, 1, 0xFFFFFFFF)));=0A=
>=0A=
> 	printf("CRC of 3 nul bytes expected: 0x6064a37a\n");=0A=
> 	printf("    soft: %08x\n", crc32c_trivial(null_test, 3, 0));=0A=
> 	rte_hash_crc_init_alg();=0A=
> 	printf("    rte accelerated: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFF=
FFFF));=0A=
> 	rte_hash_crc_set_alg(CRC32_SW);=0A=
> 	printf("    rte soft: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFFFFFF));=
=0A=
>=0A=
> 	printf("CRC of 4 nul bytes expected: 0x48674bc7\n");=0A=
> 	printf("    soft: %08x\n", crc32c_trivial(null_test, 4, 0));=0A=
> 	rte_hash_crc_init_alg();=0A=
> 	printf("    rte accelerated: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFF=
FFFF));=0A=
> 	rte_hash_crc_set_alg(CRC32_SW);=0A=
> 	printf("    rte soft: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFFFFFF));=
=0A=
> }=0A=
>=0A=
> Signed-off-by: Didier Pallard <didier.pallard@6wind.com>=0A=
> Acked-by: David Marchand <david.marchand@6wind.com>=0A=
> ---=0A=
>  lib/librte_hash/rte_crc_arm64.h |  64 ++++++++++++++++++++=0A=
>  lib/librte_hash/rte_hash_crc.h  | 125 +++++++++++++++++++++++++++++++---=
------=0A=
>  2 files changed, 162 insertions(+), 27 deletions(-)=0A=
>=0A=
> diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_ar=
m64.h=0A=
> index 02e26bc..44ef460 100644=0A=
> --- a/lib/librte_hash/rte_crc_arm64.h=0A=
> +++ b/lib/librte_hash/rte_crc_arm64.h=0A=
> @@ -50,6 +50,28 @@ extern "C" {=0A=
>  #include <rte_common.h>=0A=
>  =0A=
>  static inline uint32_t=0A=
> +crc32c_arm64_u8(uint8_t data, uint32_t init_val)=0A=
> +{=0A=
> +	asm(".arch armv8-a+crc");=0A=
> +	__asm__ volatile(=0A=
> +			"crc32cb %w[crc], %w[crc], %b[value]"=0A=
> +			: [crc] "+r" (init_val)=0A=
> +			: [value] "r" (data));=0A=
> +	return init_val;=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
> +crc32c_arm64_u16(uint16_t data, uint32_t init_val)=0A=
> +{=0A=
> +	asm(".arch armv8-a+crc");=0A=
> +	__asm__ volatile(=0A=
> +			"crc32ch %w[crc], %w[crc], %h[value]"=0A=
> +			: [crc] "+r" (init_val)=0A=
> +			: [value] "r" (data));=0A=
> +	return init_val;=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
>  crc32c_arm64_u32(uint32_t data, uint32_t init_val)=0A=
>  {=0A=
>  	asm(".arch armv8-a+crc");=0A=
> @@ -103,6 +125,48 @@ rte_hash_crc_init_alg(void)=0A=
>  }=0A=
>  =0A=
>  /**=0A=
> + * Use single crc32 instruction to perform a hash on a 1 byte value.=0A=
> + * Fall back to software crc32 implementation in case arm64 crc intrinsi=
cs is=0A=
> + * not supported=0A=
> + *=0A=
> + * @param data=0A=
> + *   Data to perform hash on.=0A=
> + * @param init_val=0A=
> + *   Value to initialise hash generator.=0A=
> + * @return=0A=
> + *   32bit calculated hash value.=0A=
> + */=0A=
> +static inline uint32_t=0A=
> +rte_hash_crc_1byte(uint8_t data, uint32_t init_val)=0A=
> +{=0A=
> +	if (likely(crc32_alg & CRC32_ARM64))=0A=
> +		return crc32c_arm64_u8(data, init_val);=0A=
> +=0A=
> +	return crc32c_1byte(data, init_val);=0A=
> +}=0A=
> +=0A=
> +/**=0A=
> + * Use single crc32 instruction to perform a hash on a 2 bytes value.=0A=
> + * Fall back to software crc32 implementation in case arm64 crc intrinsi=
cs is=0A=
> + * not supported=0A=
> + *=0A=
> + * @param data=0A=
> + *   Data to perform hash on.=0A=
> + * @param init_val=0A=
> + *   Value to initialise hash generator.=0A=
> + * @return=0A=
> + *   32bit calculated hash value.=0A=
> + */=0A=
> +static inline uint32_t=0A=
> +rte_hash_crc_2byte(uint16_t data, uint32_t init_val)=0A=
> +{=0A=
> +	if (likely(crc32_alg & CRC32_ARM64))=0A=
> +		return crc32c_arm64_u16(data, init_val);=0A=
> +=0A=
> +	return crc32c_2bytes(data, init_val);=0A=
> +}=0A=
> +=0A=
> +/**=0A=
>   * Use single crc32 instruction to perform a hash on a 4 byte value.=0A=
>   * Fall back to software crc32 implementation in case arm64 crc intrinsi=
cs is=0A=
>   * not supported=0A=
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_cr=
c.h=0A=
> index 78a34b7..485f8a2 100644=0A=
> --- a/lib/librte_hash/rte_hash_crc.h=0A=
> +++ b/lib/librte_hash/rte_hash_crc.h=0A=
> @@ -328,6 +328,28 @@ static const uint32_t crc32c_tables[8][256] =3D {{=
=0A=
>  	 crc32c_tables[(n)-1][((crc) >> 8) & 0xFF])=0A=
>  =0A=
>  static inline uint32_t=0A=
> +crc32c_1byte(uint8_t data, uint32_t init_val)=0A=
> +{=0A=
> +	uint32_t crc;=0A=
> +	crc =3D init_val;=0A=
> +	crc ^=3D data;=0A=
> +=0A=
> +	return crc32c_tables[0][crc & 0xff] ^ (crc >> 8);=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
> +crc32c_2bytes(uint16_t data, uint32_t init_val)=0A=
> +{=0A=
> +	uint32_t crc;=0A=
> +	crc =3D init_val;=0A=
> +	crc ^=3D data;=0A=
> +=0A=
> +	crc =3D CRC32_UPD(crc, 1) ^ (crc >> 16);=0A=
> +=0A=
> +	return crc;=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
>  crc32c_1word(uint32_t data, uint32_t init_val)=0A=
>  {=0A=
>  	uint32_t crc, term1, term2;=0A=
> @@ -367,6 +389,26 @@ crc32c_2words(uint64_t data, uint32_t init_val)=0A=
>  =0A=
>  #if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)=0A=
>  static inline uint32_t=0A=
> +crc32c_sse42_u8(uint8_t data, uint32_t init_val)=0A=
> +{=0A=
> +	__asm__ volatile(=0A=
> +			"crc32b %[data], %[init_val];"=0A=
> +			: [init_val] "+r" (init_val)=0A=
> +			: [data] "rm" (data));=0A=
> +	return init_val;=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
> +crc32c_sse42_u16(uint16_t data, uint32_t init_val)=0A=
> +{=0A=
> +	__asm__ volatile(=0A=
> +			"crc32w %[data], %[init_val];"=0A=
> +			: [init_val] "+r" (init_val)=0A=
> +			: [data] "rm" (data));=0A=
> +	return init_val;=0A=
> +}=0A=
> +=0A=
> +static inline uint32_t=0A=
>  crc32c_sse42_u32(uint32_t data, uint32_t init_val)=0A=
>  {=0A=
>  	__asm__ volatile(=0A=
> @@ -453,6 +495,52 @@ rte_hash_crc_init_alg(void)=0A=
>  }=0A=
>  =0A=
>  /**=0A=
> + * Use single crc32 instruction to perform a hash on a byte value.=0A=
> + * Fall back to software crc32 implementation in case SSE4.2 is=0A=
> + * not supported=0A=
> + *=0A=
> + * @param data=0A=
> + *   Data to perform hash on.=0A=
> + * @param init_val=0A=
> + *   Value to initialise hash generator.=0A=
> + * @return=0A=
> + *   32bit calculated hash value.=0A=
> + */=0A=
> +static inline uint32_t=0A=
> +rte_hash_crc_1byte(uint8_t data, uint32_t init_val)=0A=
> +{=0A=
> +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64=0A=
> +	if (likely(crc32_alg & CRC32_SSE42))=0A=
> +		return crc32c_sse42_u8(data, init_val);=0A=
> +#endif=0A=
> +=0A=
> +	return crc32c_1byte(data, init_val);=0A=
> +}=0A=
> +=0A=
> +/**=0A=
> + * Use single crc32 instruction to perform a hash on a byte value.=0A=
> + * Fall back to software crc32 implementation in case SSE4.2 is=0A=
> + * not supported=0A=
> + *=0A=
> + * @param data=0A=
> + *   Data to perform hash on.=0A=
> + * @param init_val=0A=
> + *   Value to initialise hash generator.=0A=
> + * @return=0A=
> + *   32bit calculated hash value.=0A=
> + */=0A=
> +static inline uint32_t=0A=
> +rte_hash_crc_2byte(uint16_t data, uint32_t init_val)=0A=
> +{=0A=
> +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64=0A=
> +	if (likely(crc32_alg & CRC32_SSE42))=0A=
> +		return crc32c_sse42_u16(data, init_val);=0A=
> +#endif=0A=
> +=0A=
> +	return crc32c_2bytes(data, init_val);=0A=
> +}=0A=
> +=0A=
> +/**=0A=
>   * Use single crc32 instruction to perform a hash on a 4 byte value.=0A=
>   * Fall back to software crc32 implementation in case SSE4.2 is=0A=
>   * not supported=0A=
> @@ -521,7 +609,6 @@ static inline uint32_t=0A=
>  rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val)=0A=
>  {=0A=
>  	unsigned i;=0A=
> -	uint64_t temp =3D 0;=0A=
>  	uintptr_t pd =3D (uintptr_t) data;=0A=
>  =0A=
>  	for (i =3D 0; i < data_len / 8; i++) {=0A=
> @@ -529,35 +616,19 @@ rte_hash_crc(const void *data, uint32_t data_len, u=
int32_t init_val)=0A=
>  		pd +=3D 8;=0A=
>  	}=0A=
>  =0A=
> -	switch (7 - (data_len & 0x07)) {=0A=
> -	case 0:=0A=
> -		temp |=3D (uint64_t) *((const uint8_t *)pd + 6) << 48;=0A=
> -		/* Fallthrough */=0A=
> -	case 1:=0A=
> -		temp |=3D (uint64_t) *((const uint8_t *)pd + 5) << 40;=0A=
> -		/* Fallthrough */=0A=
> -	case 2:=0A=
> -		temp |=3D (uint64_t) *((const uint8_t *)pd + 4) << 32;=0A=
> -		temp |=3D *(const uint32_t *)pd;=0A=
> -		init_val =3D rte_hash_crc_8byte(temp, init_val);=0A=
> -		break;=0A=
> -	case 3:=0A=
> +	if (data_len & 0x4) {=0A=
>  		init_val =3D rte_hash_crc_4byte(*(const uint32_t *)pd, init_val);=0A=
> -		break;=0A=
> -	case 4:=0A=
> -		temp |=3D *((const uint8_t *)pd + 2) << 16;=0A=
> -		/* Fallthrough */=0A=
> -	case 5:=0A=
> -		temp |=3D *((const uint8_t *)pd + 1) << 8;=0A=
> -		/* Fallthrough */=0A=
> -	case 6:=0A=
> -		temp |=3D *(const uint8_t *)pd;=0A=
> -		init_val =3D rte_hash_crc_4byte(temp, init_val);=0A=
> -		/* Fallthrough */=0A=
> -	default:=0A=
> -		break;=0A=
> +		pd +=3D 4;=0A=
> +	}=0A=
> +=0A=
> +	if (data_len & 0x2) {=0A=
> +		init_val =3D rte_hash_crc_2byte(*(const uint16_t *)pd, init_val);=0A=
> +		pd +=3D 2;=0A=
>  	}=0A=
>  =0A=
> +	if (data_len & 0x1)=0A=
> +		init_val =3D rte_hash_crc_1byte(*(const uint8_t *)pd, init_val);=0A=
> +=0A=
>  	return init_val;=0A=
>  }=0A=
>  =0A=
=0A=