From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f47.google.com (mail-pa0-f47.google.com [209.85.220.47]) by dpdk.org (Postfix) with ESMTP id 781E45A67 for ; Wed, 22 Apr 2015 17:33:56 +0200 (CEST) Received: by pacyx8 with SMTP id yx8so275890143pac.1 for ; Wed, 22 Apr 2015 08:33:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=Aah3JX8tRSp7oIVuckA+lUvsCt5+82DDv0stqb6qX28=; b=KCte27mTBy7bVp5XaNF11vqZnsGNzNSBZO4ukVW3FmLecpGbS1xY/LyTHUjFITheXX HkfNG2CotgVGxKL+yJfPMnUMktLwF0aWaopabKaBfmwNrVh65yEgHw0O8sQDCIRRWNtJ uT8BH+l0+yfjdp8UD01GUYFFl9AVQs41Z4kXGNAMleEn78p7WXddHMlHk71AzJz54Zma QcvZrVKK5sMP1WPo0WBiq0dY1vfElHSOijUl4o1mTOZiBU9mOCw+5C6crOfR7OD9+3wI J0YvwCdzHdNITf+OHGjwtNhd58qon+D24UGBzsWAfBc5qnW3gif18TgL9PjIkaOFv/LR 24NA== X-Received: by 10.67.24.33 with SMTP id if1mr9259540pad.24.1429716835909; Wed, 22 Apr 2015 08:33:55 -0700 (PDT) Received: from user-PC.hsd1.ca.comcast.net (c-98-234-176-9.hsd1.ca.comcast.net. [98.234.176.9]) by mx.google.com with ESMTPSA id eo5sm5386869pbb.51.2015.04.22.08.33.54 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Wed, 22 Apr 2015 08:33:55 -0700 (PDT) From: Ravi Kerur To: dev@dpdk.org Date: Wed, 22 Apr 2015 08:33:48 -0700 Message-Id: <1429716828-19012-2-git-send-email-rkerur@gmail.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1429716828-19012-1-git-send-email-rkerur@gmail.com> References: <1429716828-19012-1-git-send-email-rkerur@gmail.com> Subject: [dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 22 Apr 2015 15:33:57 -0000 This patch replaces memcmp and strncmp in librte_hash with rte_memcmp which is implemented with AVX/SSE instructions. Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu 14.04 x86_64 shows 1 second improvement when hash key length <= 64 4 seconds improvement when hash key length <= 128 This patch is RFC to engage the team and improvise performance further. Signed-off-by: Ravi Kerur --- app/test/test_hash.c | 2 +- app/test/test_hash_perf.c | 302 +++++++------ .../common/include/arch/ppc_64/rte_memcmp.h | 62 +++ .../common/include/arch/x86/rte_memcmp.h | 479 +++++++++++++++++++++ lib/librte_eal/common/include/generic/rte_memcmp.h | 119 +++++ lib/librte_hash/rte_fbk_hash.c | 7 +- lib/librte_hash/rte_hash.c | 15 +- lib/librte_hash/rte_hash.h | 2 +- 8 files changed, 857 insertions(+), 131 deletions(-) create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h diff --git a/app/test/test_hash.c b/app/test/test_hash.c index 1da27c5..719c135 100644 --- a/app/test/test_hash.c +++ b/app/test/test_hash.c @@ -65,7 +65,7 @@ */ static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc}; static uint32_t hashtest_initvals[] = {0}; -static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64}; +static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128}; /******************************************************************************/ #define LOCAL_FBK_HASH_ENTRIES_MAX (1 << 15) diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c index 6eabb21..cccf70f 100644 --- a/app/test/test_hash_perf.c +++ b/app/test/test_hash_perf.c @@ -95,7 +95,7 @@ struct tbl_perf_test_params { static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc}; static uint32_t hashtest_initvals[] = {0}; -static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 63, 64}; +static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 31, 32, 33, 40, 42, 48, 54, 60, 63, 64, 128}; /******************************************************************************/ /******************************************************************************* @@ -125,6 +125,11 @@ struct tbl_perf_test_params tbl_perf_params[] = { ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_jhash, 0}, { ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_jhash, 0}, { ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 128, rte_jhash, 0}, /* Small table, update */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ { ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_jhash, 0}, @@ -147,6 +152,11 @@ struct tbl_perf_test_params tbl_perf_params[] = { ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_jhash, 0}, { ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_jhash, 0}, { ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 128, rte_jhash, 0}, /* Small table, lookup */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ { LOOKUP, ITERATIONS, 1024, 1, 16, rte_jhash, 0}, @@ -169,6 +179,11 @@ struct tbl_perf_test_params tbl_perf_params[] = { LOOKUP, ITERATIONS, 1024, 4, 64, rte_jhash, 0}, { LOOKUP, ITERATIONS, 1024, 8, 64, rte_jhash, 0}, { LOOKUP, ITERATIONS, 1024, 16, 64, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 128, rte_jhash, 0}, /* Big table, add */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ { ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_jhash, 0}, @@ -191,6 +206,11 @@ struct tbl_perf_test_params tbl_perf_params[] = { ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_jhash, 0}, { ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_jhash, 0}, { ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 128, rte_jhash, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 128, rte_jhash, 0}, /* Big table, update */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ { ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_jhash, 0}, @@ -213,6 +233,11 @@ struct tbl_perf_test_params tbl_perf_params[] = { ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_jhash, 0}, { ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_jhash, 0}, { ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 128, rte_jhash, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 128, rte_jhash, 0}, /* Big table, lookup */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ { LOOKUP, ITERATIONS, 1048576, 1, 16, rte_jhash, 0}, @@ -235,138 +260,173 @@ struct tbl_perf_test_params tbl_perf_params[] = { LOOKUP, ITERATIONS, 1048576, 4, 64, rte_jhash, 0}, { LOOKUP, ITERATIONS, 1048576, 8, 64, rte_jhash, 0}, { LOOKUP, ITERATIONS, 1048576, 16, 64, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 128, rte_jhash, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 128, rte_jhash, 0}, /* Small table, add */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ ADD_ON_EMPTY, 1024, 1024, 1, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 2, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 4, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 8, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 16, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 1, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 2, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 4, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 8, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 16, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 1, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 2, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 4, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 8, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 16, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 1, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 2, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 1, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 2, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 4, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 8, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1024, 1024, 16, 128, rte_hash_crc, 0}, /* Small table, update */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 1, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 2, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 4, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 8, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1024, 16, 128, rte_hash_crc, 0}, /* Small table, lookup */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ LOOKUP, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 1, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 2, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 4, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 8, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1024, 16, 128, rte_hash_crc, 0}, /* Big table, add */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 2, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 4, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 8, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 16, 16, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 1, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 2, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 4, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 8, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 16, 32, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 1, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 2, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 4, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 8, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 16, 48, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 1, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 2, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_hash_crc, 0}, -{ ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 16, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 32, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 48, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 64, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 1, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 2, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 4, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 8, 128, rte_hash_crc, 0}, +{ ADD_ON_EMPTY, 1048576, 1048576, 16, 128, rte_hash_crc, 0}, /* Big table, update */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0}, -{ ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 1, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 2, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 4, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 8, 128, rte_hash_crc, 0}, +{ ADD_UPDATE, ITERATIONS, 1048576, 16, 128, rte_hash_crc, 0}, /* Big table, lookup */ /* Test type | Iterations | Entries | BucketSize | KeyLen | HashFunc | InitVal */ -{ LOOKUP, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0}, -{ LOOKUP, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 16, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 32, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 48, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 64, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 1, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 2, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 4, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 8, 128, rte_hash_crc, 0}, +{ LOOKUP, ITERATIONS, 1048576, 16, 128, rte_hash_crc, 0}, }; /******************************************************************************/ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h new file mode 100644 index 0000000..7f99ee1 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h @@ -0,0 +1,62 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCMP_PPC_64_H_ +#define _RTE_MEMCMP_PPC_64_H_ + +#include +#include +/*To include altivec.h, GCC version must >= 4.8 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcmp.h" + +#define rte_memcmp(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcmp((dst), (src), (n)) : \ + rte_memcmp_func((dst), (src), (n)); }) + +static inline bool +rte_memcmp_func(void *dst, const void *src, size_t n) +{ + return memcmp(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h new file mode 100644 index 0000000..8ea34c0 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h @@ -0,0 +1,479 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_X86_64_H_ +#define _RTE_MEMCMP_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2 implementation of memcmp(). + */ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to compare. + * @return + * true if equal otherwise false. + */ +static inline bool +rte_memcmp(const void *src_1, const void *src, + size_t n) __attribute__((always_inline)); + +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + +/** + * AVX2 implementation below + */ + +/** + * Compare 16 bytes between two locations. + * locations should not overlap. + */ +static inline bool +rte_cmp16(const uint8_t *src_1, const uint8_t *src_2) +{ + __m128i xmm0; + __m128i xmm1; + __m128i vcmp; + uint32_t vmask; + + xmm0 = _mm_loadu_si128((const __m128i *)src_1); + xmm1 = _mm_loadu_si128((const __m128i *)src_2); + + vcmp = _mm_cmpeq_epi16(xmm0, xmm1); + vmask = _mm_movemask_epi8(vcmp); + return (!(vmask == 0xffffU)); +} + +/** + * Compare 32 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp32(const uint8_t *src_1, const uint8_t *src_2) +{ + __m256i xmm0; + __m256i xmm1; + __m256i vcmp; + uint64_t vmask; + + xmm0 = _mm256_loadu_si256((const __m256i *)src_1); + xmm1 = _mm256_loadu_si256((const __m256i *)src_2); + + vcmp = _mm256_cmpeq_epi32(xmm0, xmm1); + vmask = _mm256_movemask_epi8(vcmp); + return (!(vmask == 0xffffffffU)); +} + +/** + * Compare 64 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp64(const uint8_t *src_1, const uint8_t *src_2) +{ + bool ret; + + ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32); + + if (likely(ret == 0)) + ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32); + + return ret; +} + +/** + * Compare 128 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp128(const uint8_t *src_1, const uint8_t *src_2) +{ + bool ret; + + ret = rte_cmp32(src_1 + 0 * 32, src_2 + 0 * 32); + + if (likely(ret == 0)) + ret = rte_cmp32(src_1 + 1 * 32, src_2 + 1 * 32); + + if (likely(ret == 0)) + ret = rte_cmp32(src_1 + 2 * 32, src_2 + 2 * 32); + + if (likely(ret == 0)) + ret = rte_cmp32(src_1 + 3 * 32, src_2 + 3 * 32); + + return ret; +} + +static inline bool +rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n) +{ + uintptr_t src_1u = (uintptr_t)_src_1; + uintptr_t src_2u = (uintptr_t)_src_2; + + bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1; + + /** + * Compare less than 16 bytes + */ + if (n & 0x01) { + ret_1 = (*(uint8_t *)src_1u == + *(const uint8_t *)src_2u); + src_1u = (uintptr_t)((const uint8_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint8_t *)src_2u + 1); + } + if (n & 0x02) { + ret_2 = (*(uint16_t *)src_1u == + *(const uint16_t *)src_2u); + src_1u = (uintptr_t)((const uint16_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint16_t *)src_2u + 1); + } + if (n & 0x04) { + ret_4 = (*(uint32_t *)src_1u == + *(const uint32_t *)src_2u); + src_1u = (uintptr_t)((const uint32_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint32_t *)src_2u + 1); + } + if (n & 0x08) { + ret_8 = (*(uint64_t *)src_1u == + *(const uint64_t *)src_2u); + } + return (!(ret_1 && ret_2 && ret_4 && ret_8)); +} + +static inline bool +rte_memcmp(const void *_src_1, const void *_src_2, size_t n) +{ + const uint8_t *src_1 = (const uint8_t *)_src_1; + const uint8_t *src_2 = (const uint8_t *)_src_2; + bool ret; + + /** + * Compare less than 16 bytes + */ + if (n < 16) + return rte_memcmp_remainder(_src_1, _src_2, n); + + /** + * Fast way when compare size exceeds 16 bytes + */ + if (n <= 32) { + if (likely(n & 0x20)) + ret = rte_cmp32(src_1, src_2); + else { + ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 16 + n, + src_2 - 16 + n, n - 16); + } + return ret; + } + + if (n <= 48) { + if (likely(n & 0x30)) { + ret = rte_cmp32(src_1, src_2); + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n); + } else { + ret = rte_cmp32(src_1, src_2); + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 32 + n, + src_2 - 32 + n, n - 32); + } + return ret; + } + + if (n <= 64) { + if (likely(n & 0x40)) + ret = rte_cmp64(src_1, src_2); + else { + ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n); + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 32 + n, + src_2 - 32 + n); + + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 48 + n, + src_2 - 48 + n, n - 48); + } + return ret; + } + + if (n <= 128) { + if (likely(n & 0x80)) + ret = rte_cmp128(src_1, src_2); + else { + ret = rte_cmp64(src_1, src_2); + if (likely(ret == 0)) + ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n); + + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 112 + n, + src_2 - 112 + n, n - 112); + } + return ret; + } + + return 0; +} + +#else /* RTE_MACHINE_CPUFLAG_AVX2 */ + +/** + * SSE & AVX implementation below + */ + +/** + * Compare 16 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp16(const uint8_t *src_1, const uint8_t *src_2) +{ + __m128i xmm0; + __m128i xmm1; + __m128i vcmp; + uint32_t vmask; + + xmm0 = _mm_loadu_si128((const __m128i *)src_1); + xmm1 = _mm_loadu_si128((const __m128i *)src_2); + + vcmp = _mm_cmpeq_epi16(xmm0, xmm1); + vmask = _mm_movemask_epi8(vcmp); + return (!(vmask == 0xffffU)); +} + +/** + * Compare 32 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp32(const uint8_t *src_1, const uint8_t *src_2) +{ + bool ret; + + ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16); + + return ret; +} + +/** + * Compare 64 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp64(const uint8_t *src_1, const uint8_t *src_2) +{ + bool ret; + + ret = rte_cmp16(src_1 + 0 * 16, src_2 + 0 * 16); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 + 1 * 16, src_2 + 1 * 16); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 + 2 * 16, src_2 + 2 * 16); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 + 3 * 16, src_2 + 3 * 16); + + return ret; +} + +/** + * Compare 128 bytes between two locations. + * Locations should not overlap. + */ +static inline bool +rte_cmp128(const uint8_t *src_1, const uint8_t *src_2) +{ + bool ret; + + ret = rte_cmp64(src_1 + 0 * 64, src_2 + 0 * 64); + + if (likely(ret == 0)) + ret = rte_cmp64(src_1 + 1 * 64, src_2 + 1 * 64); + + return ret; +} + +static inline bool +rte_memcmp_remainder(const void *_src_1, const void *_src_2, size_t n) +{ + uintptr_t src_1u = (uintptr_t)_src_1; + uintptr_t src_2u = (uintptr_t)_src_2; + + bool ret_1 = 1, ret_2 = 1, ret_4 = 1, ret_8 = 1; + + /** + * Compare less than 16 bytes + */ + if (n & 0x01) { + ret_1 = (*(uint8_t *)src_1u == + *(const uint8_t *)src_2u); + src_1u = (uintptr_t)((const uint8_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint8_t *)src_2u + 1); + } + if (n & 0x02) { + ret_2 = (*(uint16_t *)src_1u == + *(const uint16_t *)src_2u); + src_1u = (uintptr_t)((const uint16_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint16_t *)src_2u + 1); + } + if (n & 0x04) { + ret_4 = (*(uint32_t *)src_1u == + *(const uint32_t *)src_2u); + src_1u = (uintptr_t)((const uint32_t *)src_1u + 1); + src_2u = (uintptr_t)((const uint32_t *)src_2u + 1); + } + if (n & 0x08) { + ret_8 = (*(uint64_t *)src_1u == + *(const uint64_t *)src_2u); + } + return (!(ret_1 && ret_2 && ret_4 && ret_8)); +} + +static inline bool +rte_memcmp(const void *_src_1, const void *_src_2, size_t n) +{ + const uint8_t *src_1 = (const uint8_t *)_src_1; + const uint8_t *src_2 = (const uint8_t *)_src_2; + bool ret; + + /** + * Compare less than 16 bytes + */ + if (n < 16) + return rte_memcmp_remainder(_src_1, _src_2, n); + + /** + * Fast way when compare size exceeds 16 bytes + */ + if (n <= 32) { + if (likely(n & 0x20)) + ret = rte_cmp32(src_1, src_2); + else { + ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n); + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 16 + n, + src_2 - 16 + n, n - 16); + } + return ret; + } + + if (n <= 48) { + if (likely(n & 0x30)) { + ret = rte_cmp32(src_1, src_2); + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 32 + n, src_2 - 32 + n); + } else { + ret = rte_cmp32(src_1, src_2); + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 32 + n, + src_2 - 32 + n, n - 32); + } + return ret; + } + + if (n <= 64) { + if (likely(n & 0x40)) + ret = rte_cmp64(src_1, src_2); + else { + ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n); + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 32 + n, + src_2 - 32 + n); + + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 48 + n, + src_2 - 48 + n, n - 48); + } + return ret; + } + + if (n <= 128) { + if (likely(n & 0x80)) + ret = rte_cmp128(src_1, src_2); + else { + ret = rte_cmp64(src_1, src_2); + if (likely(ret == 0)) + ret = rte_cmp32(src_1 - 64 + n, src_2 - 64 + n); + + if (likely(ret == 0)) + ret = rte_cmp16(src_1 - 96 + n, src_2 - 96 + n); + + if (likely(ret == 0)) + ret = rte_memcmp_remainder(src_1 - 112 + n, + src_2 - 112 + n, n - 112); + } + return ret; + } + + + return 0; +} + +#endif /* RTE_MACHINE_CPUFLAG_AVX2 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCMP_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h new file mode 100644 index 0000000..694c659 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_memcmp.h @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCMP_H_ +#define _RTE_MEMCMP_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcmp(). + */ + +/** + * Compare 16 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + */ +static inline bool +rte_cmp16(const uint8_t *src_1, const uint8_t *src_2); + +/** + * Compare 32 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + */ +static inline bool +rte_cmp32(const uint8_t *src_1, const uint8_t *src_2); + +/** + * Compare 64 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src + * Pointer to the second source of the data. + */ +static inline bool +rte_cmp64(const uint8_t *src_1, const uint8_t *src_2); + +/** + * Compare 128 bytes between two locations using optimised + * instructions. The locations should not overlap. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + */ +static inline bool +rte_cmp128(const uint8_t *src_1, const uint8_t *src_2); + +#ifdef __DOXYGEN__ + +/** + * Compare bytes between two locations. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param src_1 + * Pointer to the first source of the data. + * @param src_2 + * Pointer to the second source of the data. + * @param n + * Number of bytes to copy. + * @return + * true if match otherwise false. + */ +static bool +rte_memcmp(const void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcmp() function used by rte_memcmp macro + */ +static inline bool +rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#endif /* _RTE_MEMCMP_H_ */ diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c index 356ddfe..5e796c9 100644 --- a/lib/librte_hash/rte_fbk_hash.c +++ b/lib/librte_hash/rte_fbk_hash.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,8 @@ rte_fbk_hash_find_existing(const char *name) rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); TAILQ_FOREACH(te, fbk_hash_list, next) { h = (struct rte_fbk_hash_table *) te->data; - if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0) + if (rte_memcmp(name, h->name, + RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0) break; } rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); @@ -137,7 +139,8 @@ rte_fbk_hash_create(const struct rte_fbk_hash_params *params) /* guarantee there's no existing */ TAILQ_FOREACH(te, fbk_hash_list, next) { ht = (struct rte_fbk_hash_table *) te->data; - if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0) + if (rte_memcmp(params->name, ht->name, + RTE_MIN(strlen(params->name), strlen(ht->name)) + 1) == 0) break; } if (te != NULL) diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c index 9245716..bd14f2b 100644 --- a/lib/librte_hash/rte_hash.c +++ b/lib/librte_hash/rte_hash.c @@ -42,6 +42,7 @@ #include /* for definition of RTE_CACHE_LINE_SIZE */ #include #include +#include #include #include #include @@ -153,7 +154,8 @@ rte_hash_find_existing(const char *name) rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); TAILQ_FOREACH(te, hash_list, next) { h = (struct rte_hash *) te->data; - if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0) + if (rte_memcmp(name, h->name, + RTE_MIN(strlen(name), strlen(h->name)) + 1) == 0) break; } rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); @@ -213,7 +215,8 @@ rte_hash_create(const struct rte_hash_parameters *params) /* guarantee there's no existing */ TAILQ_FOREACH(te, hash_list, next) { h = (struct rte_hash *) te->data; - if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0) + if (rte_memcmp(params->name, h->name, + RTE_MIN(strlen(params->name), strlen(h->name)) + 1) == 0) break; } if (te != NULL) @@ -309,7 +312,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), + likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i), h->key_len) == 0)) { return bucket_index * h->bucket_entries + i; } @@ -359,7 +362,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), + likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i), h->key_len) == 0)) { sig_bucket[i] = NULL_SIGNATURE; return bucket_index * h->bucket_entries + i; @@ -401,7 +404,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, /* Check if key is already present in the hash */ for (i = 0; i < h->bucket_entries; i++) { if ((sig == sig_bucket[i]) && - likely(memcmp(key, get_key_from_bucket(h, key_bucket, i), + likely(rte_memcmp(key, get_key_from_bucket(h, key_bucket, i), h->key_len) == 0)) { return bucket_index * h->bucket_entries + i; } @@ -457,7 +460,7 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, for (j = 0; j < h->bucket_entries; j++) { if ((sigs[i] == sig_bucket[j]) && - likely(memcmp(keys[i], + likely(rte_memcmp(keys[i], get_key_from_bucket(h, key_bucket, j), h->key_len) == 0)) { positions[i] = bucket_index * diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index 821a9d4..d335d0b 100644 --- a/lib/librte_hash/rte_hash.h +++ b/lib/librte_hash/rte_hash.h @@ -54,7 +54,7 @@ extern "C" { #define RTE_HASH_BUCKET_ENTRIES_MAX 16 /** Maximum length of key that can be used. */ -#define RTE_HASH_KEY_LENGTH_MAX 64 +#define RTE_HASH_KEY_LENGTH_MAX 128 /** Max number of keys that can be searched for using rte_hash_lookup_multi. */ #define RTE_HASH_LOOKUP_BULK_MAX 16 -- 1.9.1