From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 870A75936 for ; Wed, 23 Dec 2015 10:12:52 +0100 (CET) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga102.fm.intel.com with ESMTP; 23 Dec 2015 01:12:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.20,468,1444719600"; d="scan'208";a="622667623" Received: from fmsmsx106.amr.corp.intel.com ([10.18.124.204]) by FMSMGA003.fm.intel.com with ESMTP; 23 Dec 2015 01:12:38 -0800 Received: from fmsmsx154.amr.corp.intel.com (10.18.116.70) by FMSMSX106.amr.corp.intel.com (10.18.124.204) with Microsoft SMTP Server (TLS) id 14.3.248.2; Wed, 23 Dec 2015 01:12:38 -0800 Received: from shsmsx152.ccr.corp.intel.com (10.239.6.52) by FMSMSX154.amr.corp.intel.com (10.18.116.70) with Microsoft SMTP Server (TLS) id 14.3.248.2; Wed, 23 Dec 2015 01:12:37 -0800 Received: from shsmsx101.ccr.corp.intel.com ([169.254.1.190]) by SHSMSX152.ccr.corp.intel.com ([169.254.6.105]) with mapi id 14.03.0248.002; Wed, 23 Dec 2015 17:12:35 +0800 From: "Qiu, Michael" To: Didier Pallard , "dev@dpdk.org" , "Richardson, Bruce" , "De Lara Guarch, Pablo" Thread-Topic: [dpdk-dev] [PATCH] hash: fix CRC32c computation Thread-Index: AQHRPJwuz2yLNzBdWUKQmyU6cTYNEQ== Date: Wed, 23 Dec 2015 09:12:35 +0000 Message-ID: <533710CFB86FA344BFBF2D6802E6028622EFE9FB@SHSMSX101.ccr.corp.intel.com> References: <1450776898-8951-1-git-send-email-didier.pallard@6wind.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [PATCH] hash: fix CRC32c computation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 23 Dec 2015 09:12:53 -0000 Is it suitable to put so many code in commit log?=0A= =0A= Thanks,=0A= Michael=0A= On 12/22/2015 5:36 PM, Didier Pallard wrote:=0A= > As demonstrated by the following code, CRC32c computation is not valid=0A= > when buffer length is not a multiple of 4 bytes:=0A= > (Output obtained by code below)=0A= >=0A= > CRC of 1 NULL bytes expected: 0x527d5351=0A= > soft: 527d5351=0A= > rte accelerated: 48674bc7=0A= > rte soft: 48674bc7=0A= > CRC of 2 NULL bytes expected: 0xf16177d2=0A= > soft: f16177d2=0A= > rte accelerated: 48674bc7=0A= > rte soft: 48674bc7=0A= > CRC of 2x1 NULL bytes expected: 0xf16177d2=0A= > soft: f16177d2=0A= > rte accelerated: 8c28b28a=0A= > rte soft: 8c28b28a=0A= > CRC of 3 NULL bytes expected: 0x6064a37a=0A= > soft: 6064a37a=0A= > rte accelerated: 48674bc7=0A= > rte soft: 48674bc7=0A= > CRC of 4 NULL bytes expected: 0x48674bc7=0A= > soft: 48674bc7=0A= > rte accelerated: 48674bc7=0A= > rte soft: 48674bc7=0A= >=0A= > Values returned by rte_hash_crc functions does not match the one=0A= > computed by a trivial crc32c implementation.=0A= >=0A= > ARM code is a guess, it is not tested, neither compiled.=0A= >=0A= > code showing the problem:=0A= >=0A= > uint8_t null_test[32] =3D {0};=0A= >=0A= > static uint32_t crc32c_trivial(uint8_t *buffer, uint32_t length, uint32_t= crc)=0A= > {=0A= > uint32_t i, j;=0A= > for (i =3D 0; i < length; ++i)=0A= > {=0A= > crc =3D crc ^ buffer[i];=0A= > for (j =3D 0; j < 8; j++)=0A= > crc =3D (crc >> 1) ^ 0x80000000 ^ ((~crc & 1) * 0x82f63b78);= =0A= > }=0A= > return crc;=0A= > }=0A= >=0A= > void hash_test(void);=0A= > void hash_test(void)=0A= > {=0A= > printf("CRC of 1 nul byte expected: 0x527d5351\n");=0A= > printf(" soft: %08x\n", crc32c_trivial(null_test, 1, 0));=0A= > rte_hash_crc_init_alg();=0A= > printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFF= FFFF));=0A= > rte_hash_crc_set_alg(CRC32_SW);=0A= > printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFFFFFF));= =0A= >=0A= > printf("CRC of 2 nul bytes expected: 0xf16177d2\n");=0A= > printf(" soft: %08x\n", crc32c_trivial(null_test, 2, 0));=0A= > rte_hash_crc_init_alg();=0A= > printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFF= FFFF));=0A= > rte_hash_crc_set_alg(CRC32_SW);=0A= > printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFFFFFF));= =0A= >=0A= > printf("CRC of 2x1 nul bytes expected: 0xf16177d2\n");=0A= > printf(" soft: %08x\n", crc32c_trivial(null_test, 1, crc32c_trivial(n= ull_test, 1, 0)));=0A= > rte_hash_crc_init_alg();=0A= > printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, rte_ha= sh_crc(null_test, 1, 0xFFFFFFFF)));=0A= > rte_hash_crc_set_alg(CRC32_SW);=0A= > printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 1, rte_hash_crc(= null_test, 1, 0xFFFFFFFF)));=0A= >=0A= > printf("CRC of 3 nul bytes expected: 0x6064a37a\n");=0A= > printf(" soft: %08x\n", crc32c_trivial(null_test, 3, 0));=0A= > rte_hash_crc_init_alg();=0A= > printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFF= FFFF));=0A= > rte_hash_crc_set_alg(CRC32_SW);=0A= > printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFFFFFF));= =0A= >=0A= > printf("CRC of 4 nul bytes expected: 0x48674bc7\n");=0A= > printf(" soft: %08x\n", crc32c_trivial(null_test, 4, 0));=0A= > rte_hash_crc_init_alg();=0A= > printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFF= FFFF));=0A= > rte_hash_crc_set_alg(CRC32_SW);=0A= > printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFFFFFF));= =0A= > }=0A= >=0A= > Signed-off-by: Didier Pallard =0A= > Acked-by: David Marchand =0A= > ---=0A= > lib/librte_hash/rte_crc_arm64.h | 64 ++++++++++++++++++++=0A= > lib/librte_hash/rte_hash_crc.h | 125 +++++++++++++++++++++++++++++++---= ------=0A= > 2 files changed, 162 insertions(+), 27 deletions(-)=0A= >=0A= > diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_ar= m64.h=0A= > index 02e26bc..44ef460 100644=0A= > --- a/lib/librte_hash/rte_crc_arm64.h=0A= > +++ b/lib/librte_hash/rte_crc_arm64.h=0A= > @@ -50,6 +50,28 @@ extern "C" {=0A= > #include =0A= > =0A= > static inline uint32_t=0A= > +crc32c_arm64_u8(uint8_t data, uint32_t init_val)=0A= > +{=0A= > + asm(".arch armv8-a+crc");=0A= > + __asm__ volatile(=0A= > + "crc32cb %w[crc], %w[crc], %b[value]"=0A= > + : [crc] "+r" (init_val)=0A= > + : [value] "r" (data));=0A= > + return init_val;=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > +crc32c_arm64_u16(uint16_t data, uint32_t init_val)=0A= > +{=0A= > + asm(".arch armv8-a+crc");=0A= > + __asm__ volatile(=0A= > + "crc32ch %w[crc], %w[crc], %h[value]"=0A= > + : [crc] "+r" (init_val)=0A= > + : [value] "r" (data));=0A= > + return init_val;=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > crc32c_arm64_u32(uint32_t data, uint32_t init_val)=0A= > {=0A= > asm(".arch armv8-a+crc");=0A= > @@ -103,6 +125,48 @@ rte_hash_crc_init_alg(void)=0A= > }=0A= > =0A= > /**=0A= > + * Use single crc32 instruction to perform a hash on a 1 byte value.=0A= > + * Fall back to software crc32 implementation in case arm64 crc intrinsi= cs is=0A= > + * not supported=0A= > + *=0A= > + * @param data=0A= > + * Data to perform hash on.=0A= > + * @param init_val=0A= > + * Value to initialise hash generator.=0A= > + * @return=0A= > + * 32bit calculated hash value.=0A= > + */=0A= > +static inline uint32_t=0A= > +rte_hash_crc_1byte(uint8_t data, uint32_t init_val)=0A= > +{=0A= > + if (likely(crc32_alg & CRC32_ARM64))=0A= > + return crc32c_arm64_u8(data, init_val);=0A= > +=0A= > + return crc32c_1byte(data, init_val);=0A= > +}=0A= > +=0A= > +/**=0A= > + * Use single crc32 instruction to perform a hash on a 2 bytes value.=0A= > + * Fall back to software crc32 implementation in case arm64 crc intrinsi= cs is=0A= > + * not supported=0A= > + *=0A= > + * @param data=0A= > + * Data to perform hash on.=0A= > + * @param init_val=0A= > + * Value to initialise hash generator.=0A= > + * @return=0A= > + * 32bit calculated hash value.=0A= > + */=0A= > +static inline uint32_t=0A= > +rte_hash_crc_2byte(uint16_t data, uint32_t init_val)=0A= > +{=0A= > + if (likely(crc32_alg & CRC32_ARM64))=0A= > + return crc32c_arm64_u16(data, init_val);=0A= > +=0A= > + return crc32c_2bytes(data, init_val);=0A= > +}=0A= > +=0A= > +/**=0A= > * Use single crc32 instruction to perform a hash on a 4 byte value.=0A= > * Fall back to software crc32 implementation in case arm64 crc intrinsi= cs is=0A= > * not supported=0A= > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_cr= c.h=0A= > index 78a34b7..485f8a2 100644=0A= > --- a/lib/librte_hash/rte_hash_crc.h=0A= > +++ b/lib/librte_hash/rte_hash_crc.h=0A= > @@ -328,6 +328,28 @@ static const uint32_t crc32c_tables[8][256] =3D {{= =0A= > crc32c_tables[(n)-1][((crc) >> 8) & 0xFF])=0A= > =0A= > static inline uint32_t=0A= > +crc32c_1byte(uint8_t data, uint32_t init_val)=0A= > +{=0A= > + uint32_t crc;=0A= > + crc =3D init_val;=0A= > + crc ^=3D data;=0A= > +=0A= > + return crc32c_tables[0][crc & 0xff] ^ (crc >> 8);=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > +crc32c_2bytes(uint16_t data, uint32_t init_val)=0A= > +{=0A= > + uint32_t crc;=0A= > + crc =3D init_val;=0A= > + crc ^=3D data;=0A= > +=0A= > + crc =3D CRC32_UPD(crc, 1) ^ (crc >> 16);=0A= > +=0A= > + return crc;=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > crc32c_1word(uint32_t data, uint32_t init_val)=0A= > {=0A= > uint32_t crc, term1, term2;=0A= > @@ -367,6 +389,26 @@ crc32c_2words(uint64_t data, uint32_t init_val)=0A= > =0A= > #if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)=0A= > static inline uint32_t=0A= > +crc32c_sse42_u8(uint8_t data, uint32_t init_val)=0A= > +{=0A= > + __asm__ volatile(=0A= > + "crc32b %[data], %[init_val];"=0A= > + : [init_val] "+r" (init_val)=0A= > + : [data] "rm" (data));=0A= > + return init_val;=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > +crc32c_sse42_u16(uint16_t data, uint32_t init_val)=0A= > +{=0A= > + __asm__ volatile(=0A= > + "crc32w %[data], %[init_val];"=0A= > + : [init_val] "+r" (init_val)=0A= > + : [data] "rm" (data));=0A= > + return init_val;=0A= > +}=0A= > +=0A= > +static inline uint32_t=0A= > crc32c_sse42_u32(uint32_t data, uint32_t init_val)=0A= > {=0A= > __asm__ volatile(=0A= > @@ -453,6 +495,52 @@ rte_hash_crc_init_alg(void)=0A= > }=0A= > =0A= > /**=0A= > + * Use single crc32 instruction to perform a hash on a byte value.=0A= > + * Fall back to software crc32 implementation in case SSE4.2 is=0A= > + * not supported=0A= > + *=0A= > + * @param data=0A= > + * Data to perform hash on.=0A= > + * @param init_val=0A= > + * Value to initialise hash generator.=0A= > + * @return=0A= > + * 32bit calculated hash value.=0A= > + */=0A= > +static inline uint32_t=0A= > +rte_hash_crc_1byte(uint8_t data, uint32_t init_val)=0A= > +{=0A= > +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64=0A= > + if (likely(crc32_alg & CRC32_SSE42))=0A= > + return crc32c_sse42_u8(data, init_val);=0A= > +#endif=0A= > +=0A= > + return crc32c_1byte(data, init_val);=0A= > +}=0A= > +=0A= > +/**=0A= > + * Use single crc32 instruction to perform a hash on a byte value.=0A= > + * Fall back to software crc32 implementation in case SSE4.2 is=0A= > + * not supported=0A= > + *=0A= > + * @param data=0A= > + * Data to perform hash on.=0A= > + * @param init_val=0A= > + * Value to initialise hash generator.=0A= > + * @return=0A= > + * 32bit calculated hash value.=0A= > + */=0A= > +static inline uint32_t=0A= > +rte_hash_crc_2byte(uint16_t data, uint32_t init_val)=0A= > +{=0A= > +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64=0A= > + if (likely(crc32_alg & CRC32_SSE42))=0A= > + return crc32c_sse42_u16(data, init_val);=0A= > +#endif=0A= > +=0A= > + return crc32c_2bytes(data, init_val);=0A= > +}=0A= > +=0A= > +/**=0A= > * Use single crc32 instruction to perform a hash on a 4 byte value.=0A= > * Fall back to software crc32 implementation in case SSE4.2 is=0A= > * not supported=0A= > @@ -521,7 +609,6 @@ static inline uint32_t=0A= > rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val)=0A= > {=0A= > unsigned i;=0A= > - uint64_t temp =3D 0;=0A= > uintptr_t pd =3D (uintptr_t) data;=0A= > =0A= > for (i =3D 0; i < data_len / 8; i++) {=0A= > @@ -529,35 +616,19 @@ rte_hash_crc(const void *data, uint32_t data_len, u= int32_t init_val)=0A= > pd +=3D 8;=0A= > }=0A= > =0A= > - switch (7 - (data_len & 0x07)) {=0A= > - case 0:=0A= > - temp |=3D (uint64_t) *((const uint8_t *)pd + 6) << 48;=0A= > - /* Fallthrough */=0A= > - case 1:=0A= > - temp |=3D (uint64_t) *((const uint8_t *)pd + 5) << 40;=0A= > - /* Fallthrough */=0A= > - case 2:=0A= > - temp |=3D (uint64_t) *((const uint8_t *)pd + 4) << 32;=0A= > - temp |=3D *(const uint32_t *)pd;=0A= > - init_val =3D rte_hash_crc_8byte(temp, init_val);=0A= > - break;=0A= > - case 3:=0A= > + if (data_len & 0x4) {=0A= > init_val =3D rte_hash_crc_4byte(*(const uint32_t *)pd, init_val);=0A= > - break;=0A= > - case 4:=0A= > - temp |=3D *((const uint8_t *)pd + 2) << 16;=0A= > - /* Fallthrough */=0A= > - case 5:=0A= > - temp |=3D *((const uint8_t *)pd + 1) << 8;=0A= > - /* Fallthrough */=0A= > - case 6:=0A= > - temp |=3D *(const uint8_t *)pd;=0A= > - init_val =3D rte_hash_crc_4byte(temp, init_val);=0A= > - /* Fallthrough */=0A= > - default:=0A= > - break;=0A= > + pd +=3D 4;=0A= > + }=0A= > +=0A= > + if (data_len & 0x2) {=0A= > + init_val =3D rte_hash_crc_2byte(*(const uint16_t *)pd, init_val);=0A= > + pd +=3D 2;=0A= > }=0A= > =0A= > + if (data_len & 0x1)=0A= > + init_val =3D rte_hash_crc_1byte(*(const uint8_t *)pd, init_val);=0A= > +=0A= > return init_val;=0A= > }=0A= > =0A= =0A=