From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1EC3CA0C4B; Tue, 2 Nov 2021 19:38:46 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0696641123; Tue, 2 Nov 2021 19:38:36 +0100 (CET) Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by mails.dpdk.org (Postfix) with ESMTP id 2BE144111B for ; Tue, 2 Nov 2021 19:38:34 +0100 (CET) X-IronPort-AV: E=McAfee;i="6200,9189,10156"; a="211397029" X-IronPort-AV: E=Sophos;i="5.87,203,1631602800"; d="scan'208";a="211397029" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 02 Nov 2021 11:38:33 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.87,203,1631602800"; d="scan'208";a="489227698" Received: from silpixa00400072.ir.intel.com ([10.237.222.213]) by orsmga007.jf.intel.com with ESMTP; 02 Nov 2021 11:38:31 -0700 From: Vladimir Medvedkin To: dev@dpdk.org Cc: yipeng1.wang@intel.com, sameh.gobriel@intel.com, bruce.richardson@intel.com, konstantin.ananyev@intel.com, stephen@networkplumber.org, thomas@monjalon.net Date: Tue, 2 Nov 2021 18:38:23 +0000 Message-Id: <1635878305-102888-3-git-send-email-vladimir.medvedkin@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1635878305-102888-1-git-send-email-vladimir.medvedkin@intel.com> References: <1635878305-102888-1-git-send-email-vladimir.medvedkin@intel.com> In-Reply-To: <1630944239-363648-1-git-send-email-vladimir.medvedkin@intel.com> References: <1630944239-363648-1-git-send-email-vladimir.medvedkin@intel.com> Subject: [dpdk-dev] [PATCH v8 2/4] hash: add bulk toeplitz hash implementation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds a bulk version for the Toeplitz hash implemented with Galios Fields New Instructions (GFNI). Signed-off-by: Vladimir Medvedkin Acked-by: Konstantin Ananyev --- app/test/test_thash.c | 67 ++++++++++++++++++++++++++++- doc/guides/prog_guide/toeplitz_hash_lib.rst | 20 ++++++--- lib/hash/rte_thash_gfni.h | 33 ++++++++++++++ lib/hash/rte_thash_x86_gfni.h | 40 +++++++++++++++++ 4 files changed, 154 insertions(+), 6 deletions(-) diff --git a/app/test/test_thash.c b/app/test/test_thash.c index 22d784e..a625306 100644 --- a/app/test/test_thash.c +++ b/app/test/test_thash.c @@ -230,6 +230,8 @@ enum { SCALAR_DATA_BUF_2_HASH_IDX, GFNI_DATA_BUF_1_HASH_IDX, GFNI_DATA_BUF_2_HASH_IDX, + GFNI_BULK_DATA_BUF_1_HASH_IDX, + GFNI_BULK_DATA_BUF_2_HASH_IDX, HASH_IDXES }; @@ -241,6 +243,7 @@ test_toeplitz_hash_rand_data(void) uint32_t hash[HASH_IDXES] = { 0 }; uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)]; int i, j; + uint8_t *bulk_data[2]; if (!rte_thash_gfni_supported()) return TEST_SKIPPED; @@ -248,6 +251,9 @@ test_toeplitz_hash_rand_data(void) rte_thash_complete_matrix(rss_key_matrixes, default_rss_key, RTE_DIM(default_rss_key)); + for (i = 0; i < 2; i++) + bulk_data[i] = (uint8_t *)data[i]; + for (i = 0; i < ITER; i++) { for (j = 0; j < DATA_SZ; j++) { data[0][j] = rte_rand(); @@ -266,11 +272,18 @@ test_toeplitz_hash_rand_data(void) hash[GFNI_DATA_BUF_2_HASH_IDX] = rte_thash_gfni( rss_key_matrixes, (uint8_t *)data[1], DATA_SZ * sizeof(uint32_t)); + rte_thash_gfni_bulk(rss_key_matrixes, + DATA_SZ * sizeof(uint32_t), bulk_data, + &hash[GFNI_BULK_DATA_BUF_1_HASH_IDX], 2); if ((hash[SCALAR_DATA_BUF_1_HASH_IDX] != hash[GFNI_DATA_BUF_1_HASH_IDX]) || + (hash[SCALAR_DATA_BUF_1_HASH_IDX] != + hash[GFNI_BULK_DATA_BUF_1_HASH_IDX]) || (hash[SCALAR_DATA_BUF_2_HASH_IDX] != - hash[GFNI_DATA_BUF_2_HASH_IDX])) + hash[GFNI_DATA_BUF_2_HASH_IDX]) || + (hash[SCALAR_DATA_BUF_2_HASH_IDX] != + hash[GFNI_BULK_DATA_BUF_2_HASH_IDX])) return -TEST_FAILED; } @@ -284,6 +297,57 @@ enum { }; static int +test_toeplitz_hash_gfni_bulk(void) +{ + uint32_t i, j; + union rte_thash_tuple tuple[2]; + uint8_t *tuples[2]; + uint32_t rss[2] = { 0 }; + uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)]; + + if (!rte_thash_gfni_supported()) + return TEST_SKIPPED; + + /* Convert RSS key into matrixes */ + rte_thash_complete_matrix(rss_key_matrixes, default_rss_key, + RTE_DIM(default_rss_key)); + + for (i = 0; i < RTE_DIM(tuples); i++) { + /* allocate memory enough for a biggest tuple */ + tuples[i] = rte_zmalloc(NULL, RTE_THASH_V6_L4_LEN * 4, 0); + if (tuples[i] == NULL) + return -TEST_FAILED; + } + + for (i = 0; i < RTE_MIN(RTE_DIM(v4_tbl), RTE_DIM(v6_tbl)); i++) { + /*Load IPv4 headers and copy it into the corresponding tuple*/ + tuple[0].v4.src_addr = rte_cpu_to_be_32(v4_tbl[i].src_ip); + tuple[0].v4.dst_addr = rte_cpu_to_be_32(v4_tbl[i].dst_ip); + tuple[0].v4.sport = rte_cpu_to_be_16(v4_tbl[i].dst_port); + tuple[0].v4.dport = rte_cpu_to_be_16(v4_tbl[i].src_port); + rte_memcpy(tuples[0], &tuple[0], RTE_THASH_V4_L4_LEN * 4); + + /*Load IPv6 headers and copy it into the corresponding tuple*/ + for (j = 0; j < RTE_DIM(tuple[1].v6.src_addr); j++) + tuple[1].v6.src_addr[j] = v6_tbl[i].src_ip[j]; + for (j = 0; j < RTE_DIM(tuple[1].v6.dst_addr); j++) + tuple[1].v6.dst_addr[j] = v6_tbl[i].dst_ip[j]; + tuple[1].v6.sport = rte_cpu_to_be_16(v6_tbl[i].dst_port); + tuple[1].v6.dport = rte_cpu_to_be_16(v6_tbl[i].src_port); + rte_memcpy(tuples[1], &tuple[1], RTE_THASH_V6_L4_LEN * 4); + + rte_thash_gfni_bulk(rss_key_matrixes, RTE_THASH_V6_L4_LEN * 4, + tuples, rss, 2); + + if ((rss[RSS_V4_IDX] != v4_tbl[i].hash_l3l4) || + (rss[RSS_V6_IDX] != v6_tbl[i].hash_l3l4)) + return -TEST_FAILED; + } + + return TEST_SUCCESS; +} + +static int test_big_tuple_gfni(void) { uint32_t arr[16]; @@ -748,6 +812,7 @@ static struct unit_test_suite thash_tests = { TEST_CASE(test_toeplitz_hash_calc), TEST_CASE(test_toeplitz_hash_gfni), TEST_CASE(test_toeplitz_hash_rand_data), + TEST_CASE(test_toeplitz_hash_gfni_bulk), TEST_CASE(test_big_tuple_gfni), TEST_CASE(test_create_invalid), TEST_CASE(test_multiple_create), diff --git a/doc/guides/prog_guide/toeplitz_hash_lib.rst b/doc/guides/prog_guide/toeplitz_hash_lib.rst index acdd8c3..61eaafd 100644 --- a/doc/guides/prog_guide/toeplitz_hash_lib.rst +++ b/doc/guides/prog_guide/toeplitz_hash_lib.rst @@ -19,11 +19,12 @@ to calculate the RSS hash sum to spread the traffic among the queues. Toeplitz hash function API -------------------------- -There are three functions that provide calculation of the Toeplitz hash sum: +There are four functions that provide calculation of the Toeplitz hash sum: * ``rte_softrss()`` * ``rte_softrss_be()`` * ``rte_thash_gfni()`` +* ``rte_thash_gfni_bulk()`` First two functions are scalar implementation and take the parameters: @@ -38,11 +39,12 @@ to be exactly the same as the one installed on the NIC. The ``rte_softrss_be`` function is a faster implementation, but it expects ``rss_key`` to be converted to the host byte order. -The last function is vectorized implementation using -Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` returns true. -It expects the tuple to be in network byte order. +The last two functions are vectorized implementations using +Galois Fields New Instructions. Could be used if ``rte_thash_gfni_supported`` is true. +They expect the tuple to be in network byte order. -``rte_thash_gfni()`` calculates the hash value for a single tuple +``rte_thash_gfni()`` calculates the hash value for a single tuple, and +``rte_thash_gfni_bulk()`` bulk implementation of the rte_thash_gfni(). ``rte_thash_gfni()`` takes the parameters: @@ -50,6 +52,14 @@ It expects the tuple to be in network byte order. * A pointer to the tuple. * A length of the tuple in bytes. +``rte_thash_gfni_bulk()`` takes the parameters: + +* A pointer to the matrices derived from the RSS hash key using ``rte_thash_complete_matrix()``. +* A length of the longest tuple in bytes. +* Array of the pointers on data to be hashed. +* Array of ``uint32_t`` where to put calculated Toeplitz hash values +* Number of tuples in a bulk. + ``rte_thash_complete_matrix()`` is a function that calculates matrices required by GFNI implementations from the RSS hash key. It takes the parameters: diff --git a/lib/hash/rte_thash_gfni.h b/lib/hash/rte_thash_gfni.h index bbacd41..e97d912 100644 --- a/lib/hash/rte_thash_gfni.h +++ b/lib/hash/rte_thash_gfni.h @@ -45,6 +45,39 @@ rte_thash_gfni(const uint64_t *mtrx __rte_unused, return 0; } +/** + * Bulk implementation for Toeplitz hash. + * Dummy implementation. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param m + * Pointer to the matrices generated from the corresponding + * RSS hash key using rte_thash_complete_matrix(). + * @param len + * Length of the largest data buffer to be hashed. + * @param tuple + * Array of the pointers on data to be hashed. + * Data must be in network byte order. + * @param val + * Array of uint32_t where to put calculated Toeplitz hash values + * @param num + * Number of tuples to hash. + */ +__rte_experimental +static inline void +rte_thash_gfni_bulk(const uint64_t *mtrx __rte_unused, + int len __rte_unused, uint8_t *tuple[] __rte_unused, + uint32_t val[], uint32_t num) +{ + unsigned int i; + + RTE_LOG(ERR, HASH, "%s is undefined under given arch\n", __func__); + for (i = 0; i < num; i++) + val[i] = 0; +} + #endif /* RTE_THASH_GFNI_DEFINED */ #ifdef __cplusplus diff --git a/lib/hash/rte_thash_x86_gfni.h b/lib/hash/rte_thash_x86_gfni.h index 53486b6..c2889c3 100644 --- a/lib/hash/rte_thash_x86_gfni.h +++ b/lib/hash/rte_thash_x86_gfni.h @@ -174,6 +174,46 @@ rte_thash_gfni(const uint64_t *m, const uint8_t *tuple, int len) return val; } +/** + * Bulk implementation for Toeplitz hash. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * @param m + * Pointer to the matrices generated from the corresponding + * RSS hash key using rte_thash_complete_matrix(). + * Note that @p len should not exceed the length of the rss_key minus 4. + * @param len + * Length of the largest data buffer to be hashed. + * @param tuple + * Array of the pointers on data to be hashed. + * Data must be in network byte order. + * @param val + * Array of uint32_t where to put calculated Toeplitz hash values + * @param num + * Number of tuples to hash. + */ +__rte_experimental +static inline void +rte_thash_gfni_bulk(const uint64_t *mtrx, int len, uint8_t *tuple[], + uint32_t val[], uint32_t num) +{ + uint32_t i; + uint32_t val_zero; + __m512i xor_acc; + + for (i = 0; i != (num & ~1); i += 2) { + xor_acc = __rte_thash_gfni(mtrx, tuple[i], tuple[i + 1], len); + __rte_thash_xor_reduce(xor_acc, val + i, val + i + 1); + } + + if (num & 1) { + xor_acc = __rte_thash_gfni(mtrx, tuple[i], NULL, len); + __rte_thash_xor_reduce(xor_acc, val + i, &val_zero); + } +} + #endif /* _GFNI_ */ #ifdef __cplusplus -- 2.7.4