From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wm0-f52.google.com (mail-wm0-f52.google.com [74.125.82.52]) by dpdk.org (Postfix) with ESMTP id 2508DA6A for ; Tue, 22 Dec 2015 10:35:19 +0100 (CET) Received: by mail-wm0-f52.google.com with SMTP id p187so100369656wmp.0 for ; Tue, 22 Dec 2015 01:35:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=6wind-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id; bh=Bi/F6cdBsQ1EuRTArdidwsyCwVkEAFeowCmh+Pg8eyc=; b=W6BSNb/RHgNkxLyHokDNXfk+80RSOC/0BSMQoN8bNJQFl2RJpFJHV7bUR2gd7AXqR3 fLY3NEgSHEq+1ECgbiQIcXeUprZj38Reowg2laJSATZClgoalS3k89deZTb1L+MXxDfa Hj1L7bEr6NlX7j0mB+hGTjEgqzZLtHLivKqInu5wfEaF5ln4tyuIKe0CE8mwqB4TaL2p cuvwhpo6sbGNi+nQDl6gGz7rKWuW1NrEhaszmJ7C/22SG7r8LLZg5D9od28JuuDxGhRj t8LRC955K8E2duLIODr9nk8T4NeKNlMw9CawOU3YO23kduFmbHw+tIUxGfkkjNoTi+r3 j6cQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id; bh=Bi/F6cdBsQ1EuRTArdidwsyCwVkEAFeowCmh+Pg8eyc=; b=KQNab1yNuEUhLQZCJsk7Qp6ZofUy/hUWdfjnOu2q5NMXB7T3zxWw+Cy9nbuBdBN0QU dMoqU53EWj9bh8o/VoNqtTjTVLHrUg3rKORB+7DY1pIHzcYpuwfJSTIMj7UQsrU82Epb rLv3A0UySKgouAvZZRmH1E5XreHlOOJzjEw4IKofpVAyKZT8g/LaPpalwGvfCZdhxxKa wlcghe085QfIr2qFWNekvwG+yQB0ppfrVjzHHS3hR1aFLpljZh8Dz8+85tAfFKDAKV6d mqm479yBrLT8y+IMUwIH6z8t2sOqBkxjQGy71/FkInIxM2PiyHj0XUGycaQMggRD575G N9aw== X-Gm-Message-State: ALoCoQnmTY9MpuhJqV90bK7ZzdMneIlC8w8Q7eZedwLOjrpZwXc/oSsX2U3fwd6Mp9CLFO2q2BPg1qsZsNxSiiCSOY4nNalvcw== X-Received: by 10.28.30.210 with SMTP id e201mr12323249wme.42.1450776918957; Tue, 22 Dec 2015 01:35:18 -0800 (PST) Received: from 6wind.com (guy78-3-82-239-227-177.fbx.proxad.net. [82.239.227.177]) by smtp.gmail.com with ESMTPSA id c203sm7617482wmd.5.2015.12.22.01.35.16 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 22 Dec 2015 01:35:17 -0800 (PST) Received: by 6wind.com (sSMTP sendmail emulation); Tue, 22 Dec 2015 10:35:00 +0100 From: Didier Pallard To: dev@dpdk.org, bruce.richardson@intel.com, pablo.de.lara.guarch@intel.com Date: Tue, 22 Dec 2015 10:34:58 +0100 Message-Id: <1450776898-8951-1-git-send-email-didier.pallard@6wind.com> X-Mailer: git-send-email 2.1.4 Subject: [dpdk-dev] [PATCH] hash: fix CRC32c computation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 22 Dec 2015 09:35:20 -0000 As demonstrated by the following code, CRC32c computation is not valid when buffer length is not a multiple of 4 bytes: (Output obtained by code below) CRC of 1 NULL bytes expected: 0x527d5351 soft: 527d5351 rte accelerated: 48674bc7 rte soft: 48674bc7 CRC of 2 NULL bytes expected: 0xf16177d2 soft: f16177d2 rte accelerated: 48674bc7 rte soft: 48674bc7 CRC of 2x1 NULL bytes expected: 0xf16177d2 soft: f16177d2 rte accelerated: 8c28b28a rte soft: 8c28b28a CRC of 3 NULL bytes expected: 0x6064a37a soft: 6064a37a rte accelerated: 48674bc7 rte soft: 48674bc7 CRC of 4 NULL bytes expected: 0x48674bc7 soft: 48674bc7 rte accelerated: 48674bc7 rte soft: 48674bc7 Values returned by rte_hash_crc functions does not match the one computed by a trivial crc32c implementation. ARM code is a guess, it is not tested, neither compiled. code showing the problem: uint8_t null_test[32] = {0}; static uint32_t crc32c_trivial(uint8_t *buffer, uint32_t length, uint32_t crc) { uint32_t i, j; for (i = 0; i < length; ++i) { crc = crc ^ buffer[i]; for (j = 0; j < 8; j++) crc = (crc >> 1) ^ 0x80000000 ^ ((~crc & 1) * 0x82f63b78); } return crc; } void hash_test(void); void hash_test(void) { printf("CRC of 1 nul byte expected: 0x527d5351\n"); printf(" soft: %08x\n", crc32c_trivial(null_test, 1, 0)); rte_hash_crc_init_alg(); printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFFFFFF)); rte_hash_crc_set_alg(CRC32_SW); printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 1, 0xFFFFFFFF)); printf("CRC of 2 nul bytes expected: 0xf16177d2\n"); printf(" soft: %08x\n", crc32c_trivial(null_test, 2, 0)); rte_hash_crc_init_alg(); printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFFFFFF)); rte_hash_crc_set_alg(CRC32_SW); printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 2, 0xFFFFFFFF)); printf("CRC of 2x1 nul bytes expected: 0xf16177d2\n"); printf(" soft: %08x\n", crc32c_trivial(null_test, 1, crc32c_trivial(null_test, 1, 0))); rte_hash_crc_init_alg(); printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 1, rte_hash_crc(null_test, 1, 0xFFFFFFFF))); rte_hash_crc_set_alg(CRC32_SW); printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 1, rte_hash_crc(null_test, 1, 0xFFFFFFFF))); printf("CRC of 3 nul bytes expected: 0x6064a37a\n"); printf(" soft: %08x\n", crc32c_trivial(null_test, 3, 0)); rte_hash_crc_init_alg(); printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFFFFFF)); rte_hash_crc_set_alg(CRC32_SW); printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 3, 0xFFFFFFFF)); printf("CRC of 4 nul bytes expected: 0x48674bc7\n"); printf(" soft: %08x\n", crc32c_trivial(null_test, 4, 0)); rte_hash_crc_init_alg(); printf(" rte accelerated: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFFFFFF)); rte_hash_crc_set_alg(CRC32_SW); printf(" rte soft: %08x\n", ~rte_hash_crc(null_test, 4, 0xFFFFFFFF)); } Signed-off-by: Didier Pallard Acked-by: David Marchand --- lib/librte_hash/rte_crc_arm64.h | 64 ++++++++++++++++++++ lib/librte_hash/rte_hash_crc.h | 125 +++++++++++++++++++++++++++++++--------- 2 files changed, 162 insertions(+), 27 deletions(-) diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h index 02e26bc..44ef460 100644 --- a/lib/librte_hash/rte_crc_arm64.h +++ b/lib/librte_hash/rte_crc_arm64.h @@ -50,6 +50,28 @@ extern "C" { #include static inline uint32_t +crc32c_arm64_u8(uint8_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cb %w[crc], %w[crc], %b[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t +crc32c_arm64_u16(uint16_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32ch %w[crc], %w[crc], %h[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t crc32c_arm64_u32(uint32_t data, uint32_t init_val) { asm(".arch armv8-a+crc"); @@ -103,6 +125,48 @@ rte_hash_crc_init_alg(void) } /** + * Use single crc32 instruction to perform a hash on a 1 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u8(data, init_val); + + return crc32c_1byte(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 2 bytes value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_2byte(uint16_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u16(data, init_val); + + return crc32c_2bytes(data, init_val); +} + +/** * Use single crc32 instruction to perform a hash on a 4 byte value. * Fall back to software crc32 implementation in case arm64 crc intrinsics is * not supported diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index 78a34b7..485f8a2 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -328,6 +328,28 @@ static const uint32_t crc32c_tables[8][256] = {{ crc32c_tables[(n)-1][((crc) >> 8) & 0xFF]) static inline uint32_t +crc32c_1byte(uint8_t data, uint32_t init_val) +{ + uint32_t crc; + crc = init_val; + crc ^= data; + + return crc32c_tables[0][crc & 0xff] ^ (crc >> 8); +} + +static inline uint32_t +crc32c_2bytes(uint16_t data, uint32_t init_val) +{ + uint32_t crc; + crc = init_val; + crc ^= data; + + crc = CRC32_UPD(crc, 1) ^ (crc >> 16); + + return crc; +} + +static inline uint32_t crc32c_1word(uint32_t data, uint32_t init_val) { uint32_t crc, term1, term2; @@ -367,6 +389,26 @@ crc32c_2words(uint64_t data, uint32_t init_val) #if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) static inline uint32_t +crc32c_sse42_u8(uint8_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32b %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t +crc32c_sse42_u16(uint16_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32w %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t crc32c_sse42_u32(uint32_t data, uint32_t init_val) { __asm__ volatile( @@ -453,6 +495,52 @@ rte_hash_crc_init_alg(void) } /** + * Use single crc32 instruction to perform a hash on a byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u8(data, init_val); +#endif + + return crc32c_1byte(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_2byte(uint16_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u16(data, init_val); +#endif + + return crc32c_2bytes(data, init_val); +} + +/** * Use single crc32 instruction to perform a hash on a 4 byte value. * Fall back to software crc32 implementation in case SSE4.2 is * not supported @@ -521,7 +609,6 @@ static inline uint32_t rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val) { unsigned i; - uint64_t temp = 0; uintptr_t pd = (uintptr_t) data; for (i = 0; i < data_len / 8; i++) { @@ -529,35 +616,19 @@ rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val) pd += 8; } - switch (7 - (data_len & 0x07)) { - case 0: - temp |= (uint64_t) *((const uint8_t *)pd + 6) << 48; - /* Fallthrough */ - case 1: - temp |= (uint64_t) *((const uint8_t *)pd + 5) << 40; - /* Fallthrough */ - case 2: - temp |= (uint64_t) *((const uint8_t *)pd + 4) << 32; - temp |= *(const uint32_t *)pd; - init_val = rte_hash_crc_8byte(temp, init_val); - break; - case 3: + if (data_len & 0x4) { init_val = rte_hash_crc_4byte(*(const uint32_t *)pd, init_val); - break; - case 4: - temp |= *((const uint8_t *)pd + 2) << 16; - /* Fallthrough */ - case 5: - temp |= *((const uint8_t *)pd + 1) << 8; - /* Fallthrough */ - case 6: - temp |= *(const uint8_t *)pd; - init_val = rte_hash_crc_4byte(temp, init_val); - /* Fallthrough */ - default: - break; + pd += 4; + } + + if (data_len & 0x2) { + init_val = rte_hash_crc_2byte(*(const uint16_t *)pd, init_val); + pd += 2; } + if (data_len & 0x1) + init_val = rte_hash_crc_1byte(*(const uint8_t *)pd, init_val); + return init_val; } -- 2.1.4