DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature
@ 2021-03-16 18:24 Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 1/3] hash: add predictable RSS API Vladimir Medvedkin
                   ` (6 more replies)
  0 siblings, 7 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-03-16 18:24 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch series introduces predictable RSS feature.
It is based on the idea of searching for partial hash collisions
within Toeplitz hash.

The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
where (G, ^) - is a group of tuples and (H, ^) is a group of hashes
with respect to XOR operation. So tuples and hashes could be treated as
n-dimension and 32-dimension vector spaces over GF(2).
So, f(x ^ y) == f(x) ^ f(y)
where f - is the toeplitz hash function and x, y are tuples.

The ability to predict partial collisions allows user to compute
input hash value with desired LSB values.
Usually number of LSB's are defined by the size of RSS Redirection Table.

There could be number of use cases, for example:
1) NAT. Using this library it is possible to select a new port number
on a translation in the way that rss hash for original tuple will have
the same LSB's as rss hash for reverse tuple.
2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to
a desired queue.
3) TCP stack. It is possible to choose a source port number for outgoing
connections in the way that received replies will be assigned to
desired queue.
4) RSS hash key generation. Hash key initialization with random values
does not guarantee an uniform distribution amongst queues. This library
uses mathematically proved algorithm to complete the rss hash key to
provide the best distribution.

Vladimir Medvedkin (3):
  hash: add predictable RSS API
  hash: add predictable RSS implementation
  test/hash: add additional thash tests

 app/test/test_thash.c       | 383 +++++++++++++++++++++++++++-
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c | 596 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 138 ++++++++++
 lib/librte_hash/version.map |   7 +
 5 files changed, 1120 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_hash/rte_thash.c

-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v1 1/3] hash: add predictable RSS API
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
@ 2021-03-16 18:24 ` Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-03-16 18:24 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds predictable RSS API.
It is based on the idea of searching partial Toeplitz hash collisions.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c |  96 ++++++++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 138 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/version.map |   7 +++
 4 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_hash/rte_thash.c

diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 242859f..3546014 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h',
 	'rte_thash.h')
 indirect_headers += files('rte_crc_arm64.h')
 
-sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
+sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c')
+deps += ['net']
 deps += ['ring']
 deps += ['rcu']
diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
new file mode 100644
index 0000000..79e8724
--- /dev/null
+++ b/lib/librte_hash/rte_thash.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <rte_thash.h>
+#include <rte_tailq.h>
+#include <rte_random.h>
+#include <rte_memcpy.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+
+#define THASH_NAME_LEN		64
+
+struct thash_lfsr {
+	uint32_t	ref_cnt;
+	uint32_t	poly;
+	/**< polynomial associated with the lfsr */
+	uint32_t	rev_poly;
+	/**< polynomial to generate the sequence in reverse direction */
+	uint32_t	state;
+	/**< current state of the lfsr */
+	uint32_t	rev_state;
+	/**< current state of the lfsr for reverse direction */
+	uint32_t	deg;	/**< polynomial degree*/
+	uint32_t	bits_cnt;  /**< number of bits generated by lfsr*/
+};
+
+struct rte_thash_subtuple_helper {
+	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
+	LIST_ENTRY(rte_thash_subtuple_helper)	next;
+	struct thash_lfsr	*lfsr;
+	uint32_t	offset;		/** < Offset in bits of the subtuple */
+	uint32_t	len;		/** < Length in bits of the subtuple */
+	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
+	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
+	/** < Complimentary table */
+};
+
+struct rte_thash_ctx {
+	char		name[THASH_NAME_LEN];
+	LIST_HEAD(, rte_thash_subtuple_helper) head;
+	uint32_t	key_len;	/** < Length of the NIC RSS hash key */
+	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
+	uint32_t	subtuples_nb;	/** < number of subtuples */
+	uint32_t	flags;
+	uint8_t		hash_key[0];
+};
+
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name __rte_unused,
+	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
+	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+{
+	return NULL;
+}
+
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name __rte_unused)
+{
+	return NULL;
+}
+
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+{
+}
+
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused, uint32_t len __rte_unused,
+	uint32_t offset __rte_unused)
+{
+	return 0;
+}
+
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused)
+{
+	return NULL;
+}
+
+uint32_t
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
+	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+{
+	return 0;
+}
+
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+{
+	return NULL;
+}
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 061efa2..b30a85c 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015-2019 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2021 Intel Corporation
  */
 
 #ifndef _RTE_THASH_H
@@ -222,6 +223,143 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
 	return ret;
 }
 
+/**
+ * LFSR will ignore if generated m-seqence has more than 2^n -1 bits
+ */
+#define RTE_THASH_IGNORE_PERIOD_OVERFLOW	0x1
+/**
+ * Generate minimal required bit (equal to ReTa LSB) sequence into
+ * the hash_key
+ */
+#define RTE_THASH_MINIMAL_SEQ			0x2
+
+/** @internal thash context structure. */
+struct rte_thash_ctx;
+/** @internal thash helper structure. */
+struct rte_thash_subtuple_helper;
+
+/**
+ * Create a new thash context.
+ *
+ * @param name
+ *  context name
+ * @param key_len
+ *  length of the toeplitz hash key
+ * @param reta_sz
+ *  logarithm of the NIC's Redirection Table (ReTa) size,
+ *  i.e. number of the LSBs if the hash used to determine
+ *  the reta entry.
+ * @param key
+ *  pointer to the key used to init an internal key state.
+ *  Could be NULL, in this case internal key will be inited with random.
+ * @param flags
+ *  supported flags are:
+ *   RTE_THASH_IGNORE_PERIOD_OVERFLOW
+ *   RTE_THASH_MINIMAL_SEQ
+ * @return
+ *  A pointer to the created context on success
+ *  NULL otherwise
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags);
+
+/**
+ * Find an existing thash context and return a pointer to it.
+ *
+ * @param name
+ *  Name of the thash context
+ * @return
+ *  Pointer to the thash context or NULL if it was not found with rte_errno
+ *  set appropriately. Possible rte_errno values include:
+ *   - ENOENT - required entry not available to return.
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name);
+
+/**
+ * Free a thash context object
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  None
+ */
+__rte_experimental
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx);
+
+/**
+ * Add a special properties to the toeplitz hash key inside a thash context.
+ * Creates an internal helper struct which has a complimentary table
+ * to calculate toeplitz hash collisions.
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @param len
+ *  length in bits of the target subtuple
+ * @param offset
+ *  offset in bits of the subtuple
+ * @return
+ *  0 on success
+ *  negative on error
+ */
+__rte_experimental
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset);
+
+/**
+ * Find a helper in the context by the given name
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @return
+ *  Pointer to the thash helper or NULL if it was not found.
+ */
+__rte_experimental
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
+
+/**
+ * Get a complimentary value for the subtuple to produce a
+ * partial toeplitz hash collision. It muxt be XOR'ed with the
+ * subtuple to produce the hash value with the desired hash LSB's
+ *
+ * @param h
+ *  Pointer to the helper struct
+ * @param hash
+ *  toeplitz hash value calculated for the given tuple
+ * @param desired_hash
+ *  desired hash value to find a collision for
+ * @return
+ *  A complimentary value which must be xored with the corresponding subtuple
+ */
+__rte_experimental
+uint32_t
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash);
+
+/**
+ * Get a pointer to the toeplitz hash contained in the context.
+ * It changes after each addition of a helper. It should be installed to
+ * the NIC.
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  A pointer to the toeplitz hash key
+ */
+__rte_experimental
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
index c6d7308..93cb230 100644
--- a/lib/librte_hash/version.map
+++ b/lib/librte_hash/version.map
@@ -37,4 +37,11 @@ EXPERIMENTAL {
 	rte_hash_lookup_with_hash_bulk_data;
 	rte_hash_max_key_id;
 	rte_hash_rcu_qsbr_add;
+	rte_thash_add_helper;
+	rte_thash_find_existing;
+	rte_thash_free_ctx;
+	rte_thash_get_compliment;
+	rte_thash_get_helper;
+	rte_thash_get_key;
+	rte_thash_init_ctx;
 };
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v1 2/3] hash: add predictable RSS implementation
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 1/3] hash: add predictable RSS API Vladimir Medvedkin
@ 2021-03-16 18:24 ` Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 3/3] test/hash: add additional thash tests Vladimir Medvedkin
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-03-16 18:24 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch implements predictable RSS functionality.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/rte_thash.c | 532 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 516 insertions(+), 16 deletions(-)

diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
index 79e8724..d267a7a 100644
--- a/lib/librte_hash/rte_thash.c
+++ b/lib/librte_hash/rte_thash.c
@@ -12,6 +12,45 @@
 #include <rte_malloc.h>
 
 #define THASH_NAME_LEN		64
+#define TOEPLITZ_HASH_LEN	32
+
+#define	RETA_SZ_MIN	2U
+#define	RETA_SZ_MAX	16U
+#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
+					(reta_sz <= RETA_SZ_MAX))
+
+TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_thash_tailq = {
+	.name = "RTE_THASH",
+};
+EAL_REGISTER_TAILQ(rte_thash_tailq)
+
+/**
+ * Table of some irreducible polinomials over GF(2).
+ * For lfsr they are reperesented in BE bit order, and
+ * x^0 is masked out.
+ * For example, poly x^5 + x^2 + 1 will be represented
+ * as (101001b & 11111b) = 01001b = 0x9
+ */
+static const uint32_t irreducible_poly_table[][4] = {
+	{0, 0, 0, 0},	/** < degree 0 */
+	{1, 1, 1, 1},	/** < degree 1 */
+	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
+	{0x5, 0x3, 0x5, 0x3},
+	{0x9, 0x3, 0x9, 0x3},
+	{0x9, 0x1b, 0xf, 0x5},
+	{0x21, 0x33, 0x1b, 0x2d},
+	{0x41, 0x11, 0x71, 0x9},
+	{0x71, 0xa9, 0xf5, 0x8d},
+	{0x21, 0xd1, 0x69, 0x1d9},
+	{0x81, 0x2c1, 0x3b1, 0x185},
+	{0x201, 0x541, 0x341, 0x461},
+	{0x941, 0x609, 0xe19, 0x45d},
+	{0x1601, 0x1f51, 0x1171, 0x359},
+	{0x2141, 0x2111, 0x2db1, 0x2109},
+	{0x4001, 0x801, 0x101, 0x7301},
+	{0x7781, 0xa011, 0x4211, 0x86d9},
+};
 
 struct thash_lfsr {
 	uint32_t	ref_cnt;
@@ -48,49 +87,510 @@ struct rte_thash_ctx {
 	uint8_t		hash_key[0];
 };
 
+static inline uint32_t
+get_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	/*
+	 * masking the TAP bits defined by the polynomial and
+	 * calculating parity
+	 */
+	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
+	ret = lfsr->state & 0x1;
+	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+get_rev_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
+	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
+	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+thash_get_rand_poly(uint32_t poly_degree)
+{
+	return irreducible_poly_table[poly_degree][rte_rand() %
+		RTE_DIM(irreducible_poly_table[poly_degree])];
+}
+
+static struct thash_lfsr *
+alloc_lfsr(struct rte_thash_ctx *ctx)
+{
+	struct thash_lfsr *lfsr;
+	uint32_t i;
+
+	if (ctx == NULL)
+		return NULL;
+
+	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
+	if (lfsr == NULL)
+		return NULL;
+
+	lfsr->deg = ctx->reta_sz_log;
+	lfsr->poly = thash_get_rand_poly(lfsr->deg);
+	do {
+		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
+	} while (lfsr->state == 0);
+	/* init reverse order polynomial */
+	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
+	/* init proper rev_state*/
+	lfsr->rev_state = lfsr->state;
+	for (i = 0; i <= lfsr->deg; i++)
+		get_rev_bit_lfsr(lfsr);
+
+	/* clear bits_cnt after rev_state was inited */
+	lfsr->bits_cnt = 0;
+	lfsr->ref_cnt = 1;
+
+	return lfsr;
+}
+
+static void
+attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt++;
+	h->lfsr = lfsr;
+}
+
+static void
+free_lfsr(struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt--;
+	if (lfsr->ref_cnt == 0)
+		rte_free(lfsr);
+}
+
 struct rte_thash_ctx *
-rte_thash_init_ctx(const char *name __rte_unused,
-	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
-	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags)
 {
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	uint32_t i;
+	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+	ctx = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH,
+			"Can not allocate tailq entry for thash context %s\n",
+			name);
+		rte_errno = ENOMEM;
+		goto exit;
+	}
+
+	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
+	if (ctx == NULL) {
+		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
+			name);
+		rte_errno = ENOMEM;
+		goto free_te;
+	}
+
+	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
+	ctx->key_len = key_len;
+	ctx->reta_sz_log = reta_sz;
+	LIST_INIT(&ctx->head);
+	ctx->flags = flags;
+
+	if (key)
+		rte_memcpy(ctx->hash_key, key, key_len);
+	else {
+		for (i = 0; i < key_len; i++)
+			ctx->hash_key[i] = rte_rand();
+	}
+
+	te->data = (void *)ctx;
+	TAILQ_INSERT_TAIL(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	return ctx;
+free_te:
+	rte_free(te);
+exit:
+	rte_mcfg_tailq_write_unlock();
 	return NULL;
 }
 
 struct rte_thash_ctx *
-rte_thash_find_existing(const char *name __rte_unused)
+rte_thash_find_existing(const char *name)
 {
-	return NULL;
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+
+	rte_mcfg_tailq_read_unlock();
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return ctx;
 }
 
 void
-rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_free_ctx(struct rte_thash_ctx *ctx)
 {
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	struct rte_thash_subtuple_helper *ent, *tmp;
+
+	if (ctx == NULL)
+		return;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		if (te->data == (void *)ctx)
+			break;
+	}
+
+	if (te != NULL)
+		TAILQ_REMOVE(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+	ent = LIST_FIRST(&(ctx->head));
+	while (ent) {
+		free_lfsr(ent->lfsr);
+		tmp = ent;
+		ent = LIST_NEXT(ent, next);
+		LIST_REMOVE(tmp, next);
+		rte_free(tmp);
+	}
+
+	rte_free(ctx);
+	rte_free(te);
+}
+
+static inline void
+set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp &= ~(1 << bit_idx);
+	tmp |= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+/**
+ * writes m-sequence to the hash_key for range [start, end]
+ * (i.e. including start and end positions)
+ */
+static int
+generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
+	uint32_t start, uint32_t end)
+{
+	uint32_t i;
+	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
+	req_bits++; /* due to incuding end */
+
+	/* check if lfsr overflow period of the m-sequence */
+	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
+			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
+			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
+		return -ENOSPC;
+
+	if (start < end) {
+		/* original direction (from left to right)*/
+		for (i = start; i <= end; i++)
+			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
+
+	} else {
+		/* reverse direction (from right to left) */
+		for (i = end; i >= start; i--)
+			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
+	}
+
+	return 0;
+}
+
+static inline uint32_t
+get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
+{
+	uint32_t *tmp, val;
+
+	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
+	val = rte_be_to_cpu_32(*tmp);
+	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
+		ctx->reta_sz_log));
+
+	return val & ((1 << ctx->reta_sz_log) - 1);
+}
+
+static inline void
+generate_compliment_table(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h)
+{
+	int i, j, k;
+	uint32_t val;
+	uint32_t start;
+
+	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
+
+	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
+		val = 0;
+		for (j = i; j; j &= (j - 1)) {
+			k = rte_bsf32(j);
+			val ^= get_subvalue(ctx, start - k +
+				ctx->reta_sz_log - 1);
+		}
+		h->compl_table[val] = i;
+	}
+}
+
+static inline int
+insert_before(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	uint32_t start, uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if (end < cur_ent->offset) {
+		ent->lfsr = alloc_lfsr(ctx);
+		if (ent->lfsr == NULL) {
+			rte_free(ent);
+			return -ENOMEM;
+		}
+		/* generate nonoverlapping range [start, end) */
+		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -ENOSPC;
+	}
+	attach_lfsr(ent, cur_ent->lfsr);
+
+	/**
+	 * generate partially overlapping range
+	 * [start, cur_ent->start) in reverse order
+	 */
+	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_BEFORE(cur_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+	return 0;
+}
+
+static inline int
+insert_after(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	struct rte_thash_subtuple_helper *prev_ent,
+	uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -EEXIST;
+	}
+
+	attach_lfsr(ent, cur_ent->lfsr);
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_AFTER(prev_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
+	return 0;
 }
 
 int
-rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused, uint32_t len __rte_unused,
-	uint32_t offset __rte_unused)
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset)
 {
+	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
+	uint32_t start, end;
+	int ret;
+
+	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
+			((offset + len + TOEPLITZ_HASH_LEN - 1) >
+			ctx->key_len * CHAR_BIT))
+		return -EINVAL;
+
+	/* Check for existing name*/
+	LIST_FOREACH(cur_ent, &ctx->head, next) {
+		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
+			return -EEXIST;
+	}
+
+	end = offset + len + TOEPLITZ_HASH_LEN - 1;
+	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
+		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
+		offset;
+
+	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
+		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
+	if (ent == NULL)
+		return -ENOMEM;
+
+	rte_strlcpy(ent->name, name, sizeof(ent->name));
+	ent->offset = start;
+	ent->len = end - start;
+	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
+
+	cur_ent = LIST_FIRST(&ctx->head);
+	while (cur_ent) {
+		uint32_t range_end = cur_ent->offset + cur_ent->len;
+		next_ent = LIST_NEXT(cur_ent, next);
+		prev_ent = cur_ent;
+		/* Iterate through overlapping ranges */
+		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
+			range_end = RTE_MAX(next_ent->offset + next_ent->len,
+				range_end);
+			if (start > next_ent->offset)
+				prev_ent = next_ent;
+
+			next_ent = LIST_NEXT(next_ent, next);
+		}
+
+		if (start < cur_ent->offset)
+			return insert_before(ctx, ent, cur_ent, next_ent,
+				start, end, range_end);
+		else if (start < range_end)
+			return insert_after(ctx, ent, cur_ent, next_ent,
+				prev_ent, end, range_end);
+
+		cur_ent = next_ent;
+		continue;
+	}
+
+	ent->lfsr = alloc_lfsr(ctx);
+	if (ent->lfsr == NULL) {
+		rte_free(ent);
+		return -ENOMEM;
+	}
+
+	/* generate nonoverlapping range [start, end) */
+	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+	if (LIST_EMPTY(&ctx->head)) {
+		LIST_INSERT_HEAD(&ctx->head, ent, next);
+	} else {
+		LIST_FOREACH(next_ent, &ctx->head, next)
+			prev_ent = next_ent;
+
+		LIST_INSERT_AFTER(prev_ent, ent, next);
+	}
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
 	return 0;
 }
 
 struct rte_thash_subtuple_helper *
-rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused)
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
 {
+	struct rte_thash_subtuple_helper *ent;
+
+	if ((ctx == NULL) || (name == NULL))
+		return NULL;
+
+	LIST_FOREACH(ent, &ctx->head, next) {
+		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
+			return ent;
+	}
+
 	return NULL;
 }
 
 uint32_t
-rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
-	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash)
 {
-	return 0;
+	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
 }
 
 const uint8_t *
-rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_get_key(struct rte_thash_ctx *ctx)
 {
-	return NULL;
+	return ctx->hash_key;
 }
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v1 3/3] test/hash: add additional thash tests
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 1/3] hash: add predictable RSS API Vladimir Medvedkin
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
@ 2021-03-16 18:24 ` Vladimir Medvedkin
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-03-16 18:24 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds tests for predictable RSS feature

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 app/test/test_thash.c | 383 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 377 insertions(+), 6 deletions(-)

diff --git a/app/test/test_thash.c b/app/test/test_thash.c
index a6aadd1..e55c0f7 100644
--- a/app/test/test_thash.c
+++ b/app/test/test_thash.c
@@ -5,11 +5,14 @@
 #include <rte_common.h>
 #include <rte_eal.h>
 #include <rte_ip.h>
+#include <rte_random.h>
 
 #include "test.h"
 
 #include <rte_thash.h>
 
+#define HASH_MSK(reta_sz)	((1 << reta_sz) - 1)
+
 struct test_thash_v4 {
 	uint32_t	dst_ip;
 	uint32_t	src_ip;
@@ -75,7 +78,7 @@ uint8_t default_rss_key[] = {
 };
 
 static int
-test_thash(void)
+test_toeplitz_hash_calc(void)
 {
 	uint32_t i, j;
 	union rte_thash_tuple tuple;
@@ -100,7 +103,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, default_rss_key);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V4_L3_LEN, rss_key_be);
@@ -108,7 +111,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, rss_key_be);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
 	for (i = 0; i < RTE_DIM(v6_tbl); i++) {
 		/*Fill ipv6 hdr*/
@@ -127,7 +130,7 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, default_rss_key);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V6_L3_LEN, rss_key_be);
@@ -135,9 +138,377 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, rss_key_be);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
-	return 0;
+	return TEST_SUCCESS;
+}
+
+static int
+test_create_invalid(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+
+	ctx = rte_thash_init_ctx(NULL, key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx("test", 0, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 1, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 17, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_multiple_create(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+		RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+		rte_thash_free_ctx(ctx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_free_null(void)
+{
+	struct rte_thash_ctx *ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+	rte_thash_free_ctx(ctx);
+	rte_thash_free_ctx(NULL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_add_invalid_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	const int key_len = 40;
+	int reta_sz = 7;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(NULL, "test", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, NULL, reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz - 1, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz, key_len * 8);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with duplicated name\n");
+
+	/*
+	 * Create second helper with offset 3 * reta_sz.
+	 * Note firts_range helper created range in key:
+	 * [0, 32 + length{= reta_sz} - 1), i.e [0, 37).
+	 * second range is [44, 81)
+	 */
+	ret = rte_thash_add_helper(ctx, "second_range", reta_sz,
+		32 +  2 * reta_sz);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	/*
+	 * Try to create overlapping with first_ and second_ ranges,
+	 * i.e. [6, 49)
+	 */
+	ret = rte_thash_add_helper(ctx, "third_range", 2 * reta_sz, reta_sz);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with overlapping ranges\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_existing(void)
+{
+	struct rte_thash_ctx *ctx, *ret_ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret_ctx = rte_thash_find_existing("test");
+	RTE_TEST_ASSERT(ret_ctx != NULL, "can not find existing ctx\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_get_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	h = rte_thash_get_helper(NULL, "first_range");
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	h = rte_thash_get_helper(ctx, NULL);
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", 8, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	h = rte_thash_get_helper(ctx, "first_range");
+	RTE_TEST_ASSERT(h != NULL, "Can not find helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_period_overflow(void)
+{
+	struct rte_thash_ctx *ctx;
+	int reta_sz = 7; /* reflects polynomial degree */
+	int ret;
+
+	/* first create without RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz), 0);
+	RTE_TEST_ASSERT(ret == -ENOSPC,
+		"Call succeeded with invalid parameters\n");
+
+	/* requested range == len + 32 - 1, smaller than (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) - 32, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	/* create with RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL,
+		RTE_THASH_IGNORE_PERIOD_OVERFLOW);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz - 1) */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) + 10, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_predictable_rss_min_seq(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	const int key_len = 40;
+	int reta_sz = 6;
+	uint8_t initial_key[key_len];
+	const uint8_t *new_key;
+	int ret;
+	union rte_thash_tuple tuple;
+	uint32_t orig_hash, adj_hash, adj;
+	unsigned int desired_value = 27 & HASH_MSK(reta_sz);
+	uint16_t port_value = 22;
+
+	memset(initial_key, 0, key_len);
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, initial_key,
+		RTE_THASH_MINIMAL_SEQ);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8,
+		offsetof(union rte_thash_tuple, v4.sport) * 8);
+	RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
+
+	h = rte_thash_get_helper(ctx, "snat");
+	RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
+
+	new_key = rte_thash_get_key(ctx);
+	tuple.v4.src_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.dst_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.sport = 0;
+	tuple.v4.sport = rte_cpu_to_be_16(port_value);
+	tuple.v4.dport = 0;
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	orig_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	adj = rte_thash_get_compliment(h, orig_hash, desired_value);
+
+	tuple.v4.sctp_tag = rte_cpu_to_be_32(tuple.v4.sctp_tag);
+	tuple.v4.sport ^= rte_cpu_to_be_16(adj);
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	adj_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * This test creates 7 subranges in the folowing order:
+ * range_one	= [56, 95),	len = 8, offset = 56
+ * range_two	= [64, 103),	len = 8, offset = 64
+ * range_three	= [120, 159),	len = 8, offset = 120
+ * range_four	= [48, 87),	len = 8, offset = 48
+ * range_five	= [57, 95),	len = 7, offset = 57
+ * range_six	= [40, 111),	len = 40, offset = 40
+ * range_seven	= [0, 39),	len = 8, offset = 0
+ */
+struct range {
+	const char *name;
+	int len;
+	int offset;
+	int byte_idx;
+};
+
+struct range rng_arr[] = {
+	{"one",   8,  56,  7},
+	{"two",   8,  64,  8},
+	{"three", 8,  120, 15},
+	{"four",  8,  48,  6},
+	{"six",   40, 40,  9},
+	{"five",  7,  57,  7},
+	{"seven", 8,  0,   0}
+};
+
+static int
+test_predictable_rss_multirange(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h[RTE_DIM(rng_arr)];
+	const uint8_t *new_key;
+	const int key_len = 40;
+	int reta_sz = 7;
+	unsigned int i, j, k;
+	int ret;
+	uint32_t desired_value = rte_rand() & HASH_MSK(reta_sz);
+	uint8_t tuples[RTE_DIM(rng_arr)][16] = { {0} };
+	uint32_t *ptr;
+	uint32_t hashes[RTE_DIM(rng_arr)];
+	uint32_t adj_hashes[RTE_DIM(rng_arr)];
+	uint32_t adj;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		ret = rte_thash_add_helper(ctx, rng_arr[i].name,
+			rng_arr[i].len, rng_arr[i].offset);
+		RTE_TEST_ASSERT(ret == 0, "can not add helper\n");
+
+		h[i] = rte_thash_get_helper(ctx, rng_arr[i].name);
+		RTE_TEST_ASSERT(h[i] != NULL, "can not find helper\n");
+	}
+	new_key = rte_thash_get_key(ctx);
+
+	/*
+	 * calculate hashes, compliments, then adjust keys with
+	 * compliments and recalsulate hashes
+	 */
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		for (k = 0; k < 100; k++) {
+			/* init with random keys */
+			ptr = (uint32_t *)&tuples[i][0];
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_rand();
+			/* convert keys from BE to CPU byte order */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			hashes[i] = rte_softrss(ptr, 4, new_key);
+			adj = rte_thash_get_compliment(h[i], hashes[i],
+				desired_value);
+			/* convert back to BE to adjust the value */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_cpu_to_be_32(ptr[j]);
+
+			tuples[i][rng_arr[i].byte_idx] ^= adj;
+
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			adj_hashes[i] = rte_softrss(ptr, 4, new_key);
+			RTE_TEST_ASSERT((adj_hashes[i] & HASH_MSK(reta_sz)) ==
+				desired_value,
+				"bad desired value for %d tuple\n", i);
+		}
+	}
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite thash_tests = {
+	.suite_name = "thash autotest",
+	.setup = NULL,
+	.teardown = NULL,
+	.unit_test_cases = {
+	TEST_CASE(test_toeplitz_hash_calc),
+	TEST_CASE(test_create_invalid),
+	TEST_CASE(test_multiple_create),
+	TEST_CASE(test_free_null),
+	TEST_CASE(test_add_invalid_helper),
+	TEST_CASE(test_find_existing),
+	TEST_CASE(test_get_helper),
+	TEST_CASE(test_period_overflow),
+	TEST_CASE(test_predictable_rss_min_seq),
+	TEST_CASE(test_predictable_rss_multirange),
+	TEST_CASES_END()
+	}
+};
+
+static int
+test_thash(void)
+{
+	return unit_test_suite_runner(&thash_tests);
 }
 
 REGISTER_TEST_COMMAND(thash_autotest, test_thash);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
                   ` (2 preceding siblings ...)
  2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 3/3] test/hash: add additional thash tests Vladimir Medvedkin
@ 2021-04-06 19:50 ` Vladimir Medvedkin
  2021-04-08 15:56   ` Stephen Hemminger
                     ` (5 more replies)
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API Vladimir Medvedkin
                   ` (2 subsequent siblings)
  6 siblings, 6 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-06 19:50 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch series introduces predictable RSS feature.
It is based on the idea of searching for partial hash collisions
within Toeplitz hash.

The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
where (G, ^) - is a group of tuples and (H, ^) is a group of hashes
with respect to XOR operation. So tuples and hashes could be treated as
n-dimension and 32-dimension vector spaces over GF(2).
So, f(x ^ y) == f(x) ^ f(y)
where f - is the toeplitz hash function and x, y are tuples.

The ability to predict partial collisions allows user to compute
input hash value with desired LSB values.
Usually number of LSB's are defined by the size of RSS Redirection Table.

There could be number of use cases, for example:
1) NAT. Using this library it is possible to select a new port number
on a translation in the way that rss hash for original tuple will have
the same LSB's as rss hash for reverse tuple.
2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to
a desired queue.
3) TCP stack. It is possible to choose a source port number for outgoing
connections in the way that received replies will be assigned to
desired queue.
4) RSS hash key generation. Hash key initialization with random values
does not guarantee an uniform distribution amongst queues. This library
uses mathematically proved algorithm to complete the rss hash key to
provide the best distribution.

v2:
- added extra API rte_thash_adjust_tuple()
- added extra tests for rte_thash_adjust_tuple()
- added extra fields to rte_thash_subtuple_helper struct
- fixed typos 

Vladimir Medvedkin (3):
  hash: add predictable RSS API
  hash: add predictable RSS implementation
  test/hash: add additional thash tests

 app/test/test_thash.c       | 468 +++++++++++++++++++++++++++++++-
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c | 637 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 180 +++++++++++++
 lib/librte_hash/version.map |   8 +
 5 files changed, 1289 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_hash/rte_thash.c

-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
                   ` (3 preceding siblings ...)
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
@ 2021-04-06 19:50 ` Vladimir Medvedkin
  2021-04-10  0:05   ` Wang, Yipeng1
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 3/3] test/hash: add additional thash tests Vladimir Medvedkin
  6 siblings, 1 reply; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-06 19:50 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds predictable RSS API.
It is based on the idea of searching partial Toeplitz hash collisions.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c |  96 ++++++++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 138 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/version.map |   7 +++
 4 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_hash/rte_thash.c

diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 242859f..3546014 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h',
 	'rte_thash.h')
 indirect_headers += files('rte_crc_arm64.h')
 
-sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
+sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c')
+deps += ['net']
 deps += ['ring']
 deps += ['rcu']
diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
new file mode 100644
index 0000000..79e8724
--- /dev/null
+++ b/lib/librte_hash/rte_thash.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <rte_thash.h>
+#include <rte_tailq.h>
+#include <rte_random.h>
+#include <rte_memcpy.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+
+#define THASH_NAME_LEN		64
+
+struct thash_lfsr {
+	uint32_t	ref_cnt;
+	uint32_t	poly;
+	/**< polynomial associated with the lfsr */
+	uint32_t	rev_poly;
+	/**< polynomial to generate the sequence in reverse direction */
+	uint32_t	state;
+	/**< current state of the lfsr */
+	uint32_t	rev_state;
+	/**< current state of the lfsr for reverse direction */
+	uint32_t	deg;	/**< polynomial degree*/
+	uint32_t	bits_cnt;  /**< number of bits generated by lfsr*/
+};
+
+struct rte_thash_subtuple_helper {
+	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
+	LIST_ENTRY(rte_thash_subtuple_helper)	next;
+	struct thash_lfsr	*lfsr;
+	uint32_t	offset;		/** < Offset in bits of the subtuple */
+	uint32_t	len;		/** < Length in bits of the subtuple */
+	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
+	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
+	/** < Complimentary table */
+};
+
+struct rte_thash_ctx {
+	char		name[THASH_NAME_LEN];
+	LIST_HEAD(, rte_thash_subtuple_helper) head;
+	uint32_t	key_len;	/** < Length of the NIC RSS hash key */
+	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
+	uint32_t	subtuples_nb;	/** < number of subtuples */
+	uint32_t	flags;
+	uint8_t		hash_key[0];
+};
+
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name __rte_unused,
+	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
+	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+{
+	return NULL;
+}
+
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name __rte_unused)
+{
+	return NULL;
+}
+
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+{
+}
+
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused, uint32_t len __rte_unused,
+	uint32_t offset __rte_unused)
+{
+	return 0;
+}
+
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused)
+{
+	return NULL;
+}
+
+uint32_t
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
+	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+{
+	return 0;
+}
+
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+{
+	return NULL;
+}
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 061efa2..38a641b 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015-2019 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2021 Intel Corporation
  */
 
 #ifndef _RTE_THASH_H
@@ -222,6 +223,143 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
 	return ret;
 }
 
+/**
+ * LFSR will ignore if generated m-sequence has more than 2^n -1 bits
+ */
+#define RTE_THASH_IGNORE_PERIOD_OVERFLOW	0x1
+/**
+ * Generate minimal required bit (equal to ReTa LSB) sequence into
+ * the hash_key
+ */
+#define RTE_THASH_MINIMAL_SEQ			0x2
+
+/** @internal thash context structure. */
+struct rte_thash_ctx;
+/** @internal thash helper structure. */
+struct rte_thash_subtuple_helper;
+
+/**
+ * Create a new thash context.
+ *
+ * @param name
+ *  context name
+ * @param key_len
+ *  length of the toeplitz hash key
+ * @param reta_sz
+ *  logarithm of the NIC's Redirection Table (ReTa) size,
+ *  i.e. number of the LSBs if the hash used to determine
+ *  the reta entry.
+ * @param key
+ *  pointer to the key used to init an internal key state.
+ *  Could be NULL, in this case internal key will be inited with random.
+ * @param flags
+ *  supported flags are:
+ *   RTE_THASH_IGNORE_PERIOD_OVERFLOW
+ *   RTE_THASH_MINIMAL_SEQ
+ * @return
+ *  A pointer to the created context on success
+ *  NULL otherwise
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags);
+
+/**
+ * Find an existing thash context and return a pointer to it.
+ *
+ * @param name
+ *  Name of the thash context
+ * @return
+ *  Pointer to the thash context or NULL if it was not found with rte_errno
+ *  set appropriately. Possible rte_errno values include:
+ *   - ENOENT - required entry not available to return.
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name);
+
+/**
+ * Free a thash context object
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  None
+ */
+__rte_experimental
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx);
+
+/**
+ * Add a special properties to the toeplitz hash key inside a thash context.
+ * Creates an internal helper struct which has a complimentary table
+ * to calculate toeplitz hash collisions.
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @param len
+ *  length in bits of the target subtuple
+ * @param offset
+ *  offset in bits of the subtuple
+ * @return
+ *  0 on success
+ *  negative on error
+ */
+__rte_experimental
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset);
+
+/**
+ * Find a helper in the context by the given name
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @return
+ *  Pointer to the thash helper or NULL if it was not found.
+ */
+__rte_experimental
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
+
+/**
+ * Get a complimentary value for the subtuple to produce a
+ * partial toeplitz hash collision. It muxt be XOR'ed with the
+ * subtuple to produce the hash value with the desired hash LSB's
+ *
+ * @param h
+ *  Pointer to the helper struct
+ * @param hash
+ *  toeplitz hash value calculated for the given tuple
+ * @param desired_hash
+ *  desired hash value to find a collision for
+ * @return
+ *  A complimentary value which must be xored with the corresponding subtuple
+ */
+__rte_experimental
+uint32_t
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash);
+
+/**
+ * Get a pointer to the toeplitz hash contained in the context.
+ * It changes after each addition of a helper. It should be installed to
+ * the NIC.
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  A pointer to the toeplitz hash key
+ */
+__rte_experimental
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
index c6d7308..93cb230 100644
--- a/lib/librte_hash/version.map
+++ b/lib/librte_hash/version.map
@@ -37,4 +37,11 @@ EXPERIMENTAL {
 	rte_hash_lookup_with_hash_bulk_data;
 	rte_hash_max_key_id;
 	rte_hash_rcu_qsbr_add;
+	rte_thash_add_helper;
+	rte_thash_find_existing;
+	rte_thash_free_ctx;
+	rte_thash_get_compliment;
+	rte_thash_get_helper;
+	rte_thash_get_key;
+	rte_thash_init_ctx;
 };
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
                   ` (4 preceding siblings ...)
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API Vladimir Medvedkin
@ 2021-04-06 19:50 ` Vladimir Medvedkin
  2021-04-07 12:53   ` Ananyev, Konstantin
  2021-04-10  0:10   ` Wang, Yipeng1
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 3/3] test/hash: add additional thash tests Vladimir Medvedkin
  6 siblings, 2 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-06 19:50 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch implements predictable RSS functionality.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/rte_thash.c | 577 ++++++++++++++++++++++++++++++++++++++++++--
 lib/librte_hash/rte_thash.h |  42 ++++
 lib/librte_hash/version.map |   1 +
 3 files changed, 602 insertions(+), 18 deletions(-)

diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
index 79e8724..cc60ada 100644
--- a/lib/librte_hash/rte_thash.c
+++ b/lib/librte_hash/rte_thash.c
@@ -12,6 +12,45 @@
 #include <rte_malloc.h>
 
 #define THASH_NAME_LEN		64
+#define TOEPLITZ_HASH_LEN	32
+
+#define	RETA_SZ_MIN	2U
+#define	RETA_SZ_MAX	16U
+#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
+					(reta_sz <= RETA_SZ_MAX))
+
+TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_thash_tailq = {
+	.name = "RTE_THASH",
+};
+EAL_REGISTER_TAILQ(rte_thash_tailq)
+
+/**
+ * Table of some irreducible polinomials over GF(2).
+ * For lfsr they are reperesented in BE bit order, and
+ * x^0 is masked out.
+ * For example, poly x^5 + x^2 + 1 will be represented
+ * as (101001b & 11111b) = 01001b = 0x9
+ */
+static const uint32_t irreducible_poly_table[][4] = {
+	{0, 0, 0, 0},	/** < degree 0 */
+	{1, 1, 1, 1},	/** < degree 1 */
+	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
+	{0x5, 0x3, 0x5, 0x3},
+	{0x9, 0x3, 0x9, 0x3},
+	{0x9, 0x1b, 0xf, 0x5},
+	{0x21, 0x33, 0x1b, 0x2d},
+	{0x41, 0x11, 0x71, 0x9},
+	{0x71, 0xa9, 0xf5, 0x8d},
+	{0x21, 0xd1, 0x69, 0x1d9},
+	{0x81, 0x2c1, 0x3b1, 0x185},
+	{0x201, 0x541, 0x341, 0x461},
+	{0x941, 0x609, 0xe19, 0x45d},
+	{0x1601, 0x1f51, 0x1171, 0x359},
+	{0x2141, 0x2111, 0x2db1, 0x2109},
+	{0x4001, 0x801, 0x101, 0x7301},
+	{0x7781, 0xa011, 0x4211, 0x86d9},
+};
 
 struct thash_lfsr {
 	uint32_t	ref_cnt;
@@ -31,8 +70,10 @@ struct rte_thash_subtuple_helper {
 	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
 	LIST_ENTRY(rte_thash_subtuple_helper)	next;
 	struct thash_lfsr	*lfsr;
-	uint32_t	offset;		/** < Offset in bits of the subtuple */
-	uint32_t	len;		/** < Length in bits of the subtuple */
+	uint32_t	offset;		/** < Offset of the m-sequence */
+	uint32_t	len;		/** < Length of the m-sequence */
+	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
+	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
 	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
 	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
 	/** < Complimentary table */
@@ -48,49 +89,549 @@ struct rte_thash_ctx {
 	uint8_t		hash_key[0];
 };
 
+static inline uint32_t
+get_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	/*
+	 * masking the TAP bits defined by the polynomial and
+	 * calculating parity
+	 */
+	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
+	ret = lfsr->state & 0x1;
+	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+get_rev_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
+	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
+	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+thash_get_rand_poly(uint32_t poly_degree)
+{
+	return irreducible_poly_table[poly_degree][rte_rand() %
+		RTE_DIM(irreducible_poly_table[poly_degree])];
+}
+
+static struct thash_lfsr *
+alloc_lfsr(struct rte_thash_ctx *ctx)
+{
+	struct thash_lfsr *lfsr;
+	uint32_t i;
+
+	if (ctx == NULL)
+		return NULL;
+
+	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
+	if (lfsr == NULL)
+		return NULL;
+
+	lfsr->deg = ctx->reta_sz_log;
+	lfsr->poly = thash_get_rand_poly(lfsr->deg);
+	do {
+		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
+	} while (lfsr->state == 0);
+	/* init reverse order polynomial */
+	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
+	/* init proper rev_state*/
+	lfsr->rev_state = lfsr->state;
+	for (i = 0; i <= lfsr->deg; i++)
+		get_rev_bit_lfsr(lfsr);
+
+	/* clear bits_cnt after rev_state was inited */
+	lfsr->bits_cnt = 0;
+	lfsr->ref_cnt = 1;
+
+	return lfsr;
+}
+
+static void
+attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt++;
+	h->lfsr = lfsr;
+}
+
+static void
+free_lfsr(struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt--;
+	if (lfsr->ref_cnt == 0)
+		rte_free(lfsr);
+}
+
 struct rte_thash_ctx *
-rte_thash_init_ctx(const char *name __rte_unused,
-	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
-	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags)
 {
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	uint32_t i;
+	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+	ctx = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH,
+			"Can not allocate tailq entry for thash context %s\n",
+			name);
+		rte_errno = ENOMEM;
+		goto exit;
+	}
+
+	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
+	if (ctx == NULL) {
+		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
+			name);
+		rte_errno = ENOMEM;
+		goto free_te;
+	}
+
+	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
+	ctx->key_len = key_len;
+	ctx->reta_sz_log = reta_sz;
+	LIST_INIT(&ctx->head);
+	ctx->flags = flags;
+
+	if (key)
+		rte_memcpy(ctx->hash_key, key, key_len);
+	else {
+		for (i = 0; i < key_len; i++)
+			ctx->hash_key[i] = rte_rand();
+	}
+
+	te->data = (void *)ctx;
+	TAILQ_INSERT_TAIL(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	return ctx;
+free_te:
+	rte_free(te);
+exit:
+	rte_mcfg_tailq_write_unlock();
 	return NULL;
 }
 
 struct rte_thash_ctx *
-rte_thash_find_existing(const char *name __rte_unused)
+rte_thash_find_existing(const char *name)
 {
-	return NULL;
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+
+	rte_mcfg_tailq_read_unlock();
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return ctx;
 }
 
 void
-rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_free_ctx(struct rte_thash_ctx *ctx)
 {
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	struct rte_thash_subtuple_helper *ent, *tmp;
+
+	if (ctx == NULL)
+		return;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		if (te->data == (void *)ctx)
+			break;
+	}
+
+	if (te != NULL)
+		TAILQ_REMOVE(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+	ent = LIST_FIRST(&(ctx->head));
+	while (ent) {
+		free_lfsr(ent->lfsr);
+		tmp = ent;
+		ent = LIST_NEXT(ent, next);
+		LIST_REMOVE(tmp, next);
+		rte_free(tmp);
+	}
+
+	rte_free(ctx);
+	rte_free(te);
+}
+
+static inline void
+set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp &= ~(1 << bit_idx);
+	tmp |= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+/**
+ * writes m-sequence to the hash_key for range [start, end]
+ * (i.e. including start and end positions)
+ */
+static int
+generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
+	uint32_t start, uint32_t end)
+{
+	uint32_t i;
+	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
+	req_bits++; /* due to incuding end */
+
+	/* check if lfsr overflow period of the m-sequence */
+	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
+			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
+			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
+		return -ENOSPC;
+
+	if (start < end) {
+		/* original direction (from left to right)*/
+		for (i = start; i <= end; i++)
+			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
+
+	} else {
+		/* reverse direction (from right to left) */
+		for (i = end; i >= start; i--)
+			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
+	}
+
+	return 0;
+}
+
+static inline uint32_t
+get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
+{
+	uint32_t *tmp, val;
+
+	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
+	val = rte_be_to_cpu_32(*tmp);
+	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
+		ctx->reta_sz_log));
+
+	return val & ((1 << ctx->reta_sz_log) - 1);
+}
+
+static inline void
+generate_compliment_table(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h)
+{
+	int i, j, k;
+	uint32_t val;
+	uint32_t start;
+
+	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
+
+	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
+		val = 0;
+		for (j = i; j; j &= (j - 1)) {
+			k = rte_bsf32(j);
+			val ^= get_subvalue(ctx, start - k +
+				ctx->reta_sz_log - 1);
+		}
+		h->compl_table[val] = i;
+	}
+}
+
+static inline int
+insert_before(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	uint32_t start, uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if (end < cur_ent->offset) {
+		ent->lfsr = alloc_lfsr(ctx);
+		if (ent->lfsr == NULL) {
+			rte_free(ent);
+			return -ENOMEM;
+		}
+		/* generate nonoverlapping range [start, end) */
+		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -ENOSPC;
+	}
+	attach_lfsr(ent, cur_ent->lfsr);
+
+	/**
+	 * generate partially overlapping range
+	 * [start, cur_ent->start) in reverse order
+	 */
+	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_BEFORE(cur_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+	return 0;
+}
+
+static inline int
+insert_after(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	struct rte_thash_subtuple_helper *prev_ent,
+	uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -EEXIST;
+	}
+
+	attach_lfsr(ent, cur_ent->lfsr);
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_AFTER(prev_ent, ent, next);
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
+	return 0;
 }
 
 int
-rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused, uint32_t len __rte_unused,
-	uint32_t offset __rte_unused)
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset)
 {
+	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
+	uint32_t start, end;
+	int ret;
+
+	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
+			((offset + len + TOEPLITZ_HASH_LEN - 1) >
+			ctx->key_len * CHAR_BIT))
+		return -EINVAL;
+
+	/* Check for existing name*/
+	LIST_FOREACH(cur_ent, &ctx->head, next) {
+		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
+			return -EEXIST;
+	}
+
+	end = offset + len + TOEPLITZ_HASH_LEN - 1;
+	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
+		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
+		offset;
+
+	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
+		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
+	if (ent == NULL)
+		return -ENOMEM;
+
+	rte_strlcpy(ent->name, name, sizeof(ent->name));
+	ent->offset = start;
+	ent->len = end - start;
+	ent->tuple_offset = offset;
+	ent->tuple_len = len;
+	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
+
+	cur_ent = LIST_FIRST(&ctx->head);
+	while (cur_ent) {
+		uint32_t range_end = cur_ent->offset + cur_ent->len;
+		next_ent = LIST_NEXT(cur_ent, next);
+		prev_ent = cur_ent;
+		/* Iterate through overlapping ranges */
+		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
+			range_end = RTE_MAX(next_ent->offset + next_ent->len,
+				range_end);
+			if (start > next_ent->offset)
+				prev_ent = next_ent;
+
+			next_ent = LIST_NEXT(next_ent, next);
+		}
+
+		if (start < cur_ent->offset)
+			return insert_before(ctx, ent, cur_ent, next_ent,
+				start, end, range_end);
+		else if (start < range_end)
+			return insert_after(ctx, ent, cur_ent, next_ent,
+				prev_ent, end, range_end);
+
+		cur_ent = next_ent;
+		continue;
+	}
+
+	ent->lfsr = alloc_lfsr(ctx);
+	if (ent->lfsr == NULL) {
+		rte_free(ent);
+		return -ENOMEM;
+	}
+
+	/* generate nonoverlapping range [start, end) */
+	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+	if (LIST_EMPTY(&ctx->head)) {
+		LIST_INSERT_HEAD(&ctx->head, ent, next);
+	} else {
+		LIST_FOREACH(next_ent, &ctx->head, next)
+			prev_ent = next_ent;
+
+		LIST_INSERT_AFTER(prev_ent, ent, next);
+	}
+	generate_compliment_table(ctx, ent);
+	ctx->subtuples_nb++;
+
 	return 0;
 }
 
 struct rte_thash_subtuple_helper *
-rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused)
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
 {
+	struct rte_thash_subtuple_helper *ent;
+
+	if ((ctx == NULL) || (name == NULL))
+		return NULL;
+
+	LIST_FOREACH(ent, &ctx->head, next) {
+		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
+			return ent;
+	}
+
 	return NULL;
 }
 
 uint32_t
-rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
-	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash)
 {
-	return 0;
+	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
 }
 
 const uint8_t *
-rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_get_key(struct rte_thash_ctx *ctx)
 {
-	return NULL;
+	return ctx->hash_key;
+}
+
+static inline void
+xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp ^= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+int
+rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
+	uint8_t *orig_tuple, uint32_t adj_bits,
+	rte_thash_check_tuple_t fn, void *userdata)
+{
+	unsigned i;
+
+	if ((h == NULL) || (orig_tuple == NULL))
+		return -EINVAL;
+
+	adj_bits &= h->lsb_msk;
+	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
+	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
+		uint8_t bit = (adj_bits >> i) & 0x1;
+		if (bit)
+			xor_bit(orig_tuple, bit,
+				h->tuple_offset + h->tuple_len - 1 - i);
+	}
+
+	if (fn != NULL)
+		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
+
+	return 0;
 }
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 38a641b..fd67931 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -360,6 +360,48 @@ __rte_experimental
 const uint8_t *
 rte_thash_get_key(struct rte_thash_ctx *ctx);
 
+/**
+ * Function prototype for the rte_thash_adjust_tuple
+ * to check if adjusted tuple could be used.
+ * Generally it is some kind of lookup function to check
+ * if adjusted tuple is already in use.
+ *
+ * @param userdata
+ *  Pointer to the userdata. It could be a pointer to the
+ *  table with used tuples to search.
+ * @param tuple
+ *  Pointer to the tuple to check
+ *
+ * @return
+ *  1 on success
+ *  0 otherwise
+ */
+typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
+
+/**
+ * Adjust tuple with complimentary bits.
+ *
+ * @param h
+ *  Pointer to the helper struct
+ * @param orig_tuple
+ *  Pointer to the tuple to be adjusted
+ * @param adj_bits
+ *  Valure returned by rte_thash_get_compliment
+ * @param fn
+ *  Callback function to check adjusted tuple. Could be NULL
+ * @param userdata
+ *  Pointer to the userdata to be passed to fn(). Could be NULL
+ *
+ * @return
+ *  0 on success
+ *  negative otherwise
+ */
+__rte_experimental
+int
+rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
+	uint8_t *orig_tuple, uint32_t adj_bits,
+	rte_thash_check_tuple_t fn, void *userdata);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
index 93cb230..a992a1e 100644
--- a/lib/librte_hash/version.map
+++ b/lib/librte_hash/version.map
@@ -32,6 +32,7 @@ DPDK_21 {
 EXPERIMENTAL {
 	global:
 
+	rte_thash_adjust_tuple;
 	rte_hash_free_key_with_position;
 	rte_hash_lookup_with_hash_bulk;
 	rte_hash_lookup_with_hash_bulk_data;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] test/hash: add additional thash tests
  2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
                   ` (5 preceding siblings ...)
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
@ 2021-04-06 19:50 ` Vladimir Medvedkin
  6 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-06 19:50 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds tests for predictable RSS feature

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 app/test/test_thash.c | 468 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 462 insertions(+), 6 deletions(-)

diff --git a/app/test/test_thash.c b/app/test/test_thash.c
index a6aadd1..28ae767 100644
--- a/app/test/test_thash.c
+++ b/app/test/test_thash.c
@@ -5,11 +5,15 @@
 #include <rte_common.h>
 #include <rte_eal.h>
 #include <rte_ip.h>
+#include <rte_random.h>
 
 #include "test.h"
 
 #include <rte_thash.h>
 
+#define HASH_MSK(reta_sz)	((1 << reta_sz) - 1)
+#define TUPLE_SZ	(RTE_THASH_V4_L4_LEN * 4)
+
 struct test_thash_v4 {
 	uint32_t	dst_ip;
 	uint32_t	src_ip;
@@ -75,7 +79,7 @@ uint8_t default_rss_key[] = {
 };
 
 static int
-test_thash(void)
+test_toeplitz_hash_calc(void)
 {
 	uint32_t i, j;
 	union rte_thash_tuple tuple;
@@ -100,7 +104,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, default_rss_key);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V4_L3_LEN, rss_key_be);
@@ -108,7 +112,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, rss_key_be);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
 	for (i = 0; i < RTE_DIM(v6_tbl); i++) {
 		/*Fill ipv6 hdr*/
@@ -127,7 +131,7 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, default_rss_key);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V6_L3_LEN, rss_key_be);
@@ -135,9 +139,461 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, rss_key_be);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
-	return 0;
+	return TEST_SUCCESS;
+}
+
+static int
+test_create_invalid(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+
+	ctx = rte_thash_init_ctx(NULL, key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx("test", 0, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 1, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 17, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_multiple_create(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+		RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+		rte_thash_free_ctx(ctx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_free_null(void)
+{
+	struct rte_thash_ctx *ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+	rte_thash_free_ctx(ctx);
+	rte_thash_free_ctx(NULL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_add_invalid_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	const int key_len = 40;
+	int reta_sz = 7;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(NULL, "test", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, NULL, reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz - 1, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz, key_len * 8);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with duplicated name\n");
+
+	/*
+	 * Create second helper with offset 3 * reta_sz.
+	 * Note firts_range helper created range in key:
+	 * [0, 32 + length{= reta_sz} - 1), i.e [0, 37).
+	 * second range is [44, 81)
+	 */
+	ret = rte_thash_add_helper(ctx, "second_range", reta_sz,
+		32 +  2 * reta_sz);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	/*
+	 * Try to create overlapping with first_ and second_ ranges,
+	 * i.e. [6, 49)
+	 */
+	ret = rte_thash_add_helper(ctx, "third_range", 2 * reta_sz, reta_sz);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with overlapping ranges\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_existing(void)
+{
+	struct rte_thash_ctx *ctx, *ret_ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret_ctx = rte_thash_find_existing("test");
+	RTE_TEST_ASSERT(ret_ctx != NULL, "can not find existing ctx\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_get_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	h = rte_thash_get_helper(NULL, "first_range");
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	h = rte_thash_get_helper(ctx, NULL);
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", 8, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	h = rte_thash_get_helper(ctx, "first_range");
+	RTE_TEST_ASSERT(h != NULL, "Can not find helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_period_overflow(void)
+{
+	struct rte_thash_ctx *ctx;
+	int reta_sz = 7; /* reflects polynomial degree */
+	int ret;
+
+	/* first create without RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz), 0);
+	RTE_TEST_ASSERT(ret == -ENOSPC,
+		"Call succeeded with invalid parameters\n");
+
+	/* requested range == len + 32 - 1, smaller than (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) - 32, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	/* create with RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL,
+		RTE_THASH_IGNORE_PERIOD_OVERFLOW);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz - 1) */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) + 10, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_predictable_rss_min_seq(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	const int key_len = 40;
+	int reta_sz = 6;
+	uint8_t initial_key[key_len];
+	const uint8_t *new_key;
+	int ret;
+	union rte_thash_tuple tuple;
+	uint32_t orig_hash, adj_hash, adj;
+	unsigned int desired_value = 27 & HASH_MSK(reta_sz);
+	uint16_t port_value = 22;
+
+	memset(initial_key, 0, key_len);
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, initial_key,
+		RTE_THASH_MINIMAL_SEQ);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8,
+		offsetof(union rte_thash_tuple, v4.sport) * 8);
+	RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
+
+	h = rte_thash_get_helper(ctx, "snat");
+	RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
+
+	new_key = rte_thash_get_key(ctx);
+	tuple.v4.src_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.dst_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.sport = 0;
+	tuple.v4.sport = rte_cpu_to_be_16(port_value);
+	tuple.v4.dport = 0;
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	orig_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	adj = rte_thash_get_compliment(h, orig_hash, desired_value);
+
+	tuple.v4.sctp_tag = rte_cpu_to_be_32(tuple.v4.sctp_tag);
+	tuple.v4.sport ^= rte_cpu_to_be_16(adj);
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	adj_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * This test creates 7 subranges in the folowing order:
+ * range_one	= [56, 95),	len = 8, offset = 56
+ * range_two	= [64, 103),	len = 8, offset = 64
+ * range_three	= [120, 159),	len = 8, offset = 120
+ * range_four	= [48, 87),	len = 8, offset = 48
+ * range_five	= [57, 95),	len = 7, offset = 57
+ * range_six	= [40, 111),	len = 40, offset = 40
+ * range_seven	= [0, 39),	len = 8, offset = 0
+ */
+struct range {
+	const char *name;
+	int len;
+	int offset;
+	int byte_idx;
+};
+
+struct range rng_arr[] = {
+	{"one",   8,  56,  7},
+	{"two",   8,  64,  8},
+	{"three", 8,  120, 15},
+	{"four",  8,  48,  6},
+	{"six",   40, 40,  9},
+	{"five",  7,  57,  7},
+	{"seven", 8,  0,   0}
+};
+
+static int
+test_predictable_rss_multirange(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h[RTE_DIM(rng_arr)];
+	const uint8_t *new_key;
+	const int key_len = 40;
+	int reta_sz = 7;
+	unsigned int i, j, k;
+	int ret;
+	uint32_t desired_value = rte_rand() & HASH_MSK(reta_sz);
+	uint8_t tuples[RTE_DIM(rng_arr)][16] = { {0} };
+	uint32_t *ptr;
+	uint32_t hashes[RTE_DIM(rng_arr)];
+	uint32_t adj_hashes[RTE_DIM(rng_arr)];
+	uint32_t adj;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		ret = rte_thash_add_helper(ctx, rng_arr[i].name,
+			rng_arr[i].len, rng_arr[i].offset);
+		RTE_TEST_ASSERT(ret == 0, "can not add helper\n");
+
+		h[i] = rte_thash_get_helper(ctx, rng_arr[i].name);
+		RTE_TEST_ASSERT(h[i] != NULL, "can not find helper\n");
+	}
+	new_key = rte_thash_get_key(ctx);
+
+	/*
+	 * calculate hashes, compliments, then adjust keys with
+	 * compliments and recalsulate hashes
+	 */
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		for (k = 0; k < 100; k++) {
+			/* init with random keys */
+			ptr = (uint32_t *)&tuples[i][0];
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_rand();
+			/* convert keys from BE to CPU byte order */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			hashes[i] = rte_softrss(ptr, 4, new_key);
+			adj = rte_thash_get_compliment(h[i], hashes[i],
+				desired_value);
+			/* convert back to BE to adjust the value */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_cpu_to_be_32(ptr[j]);
+
+			tuples[i][rng_arr[i].byte_idx] ^= adj;
+
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			adj_hashes[i] = rte_softrss(ptr, 4, new_key);
+			RTE_TEST_ASSERT((adj_hashes[i] & HASH_MSK(reta_sz)) ==
+				desired_value,
+				"bad desired value for %d tuple\n", i);
+		}
+	}
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+cmp_tuple_eq(void *userdata, uint8_t *tuple)
+{
+	return memcmp(userdata, tuple, TUPLE_SZ);
+}
+
+static int
+test_adjust_tuple(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	const int key_len = 40;
+	int reta_sz = CHAR_BIT;
+	const uint8_t *new_key;
+	int ret;
+	uint8_t be_tuple[TUPLE_SZ];
+	uint8_t be_tuple_tmp[TUPLE_SZ];
+	uint8_t le_tuple[TUPLE_SZ];
+	uint32_t orig_hash, adj_hash, adj;
+	unsigned int i;
+	unsigned int desired_value = 27 & HASH_MSK(reta_sz);
+
+	memset(be_tuple, 0xab, TUPLE_SZ);
+	memset(le_tuple, 0xab, TUPLE_SZ);
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	/* set offset to be in the middle of a byte */
+	ret = rte_thash_add_helper(ctx, "test", CHAR_BIT, (5 * CHAR_BIT) + 4);
+	RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
+
+	h = rte_thash_get_helper(ctx, "test");
+	RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
+
+	new_key = rte_thash_get_key(ctx);
+
+	/* at the moment be_ and le_ tuples are the same */
+	orig_hash = rte_softrss((uint32_t *)le_tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+
+	adj = rte_thash_get_compliment(h, orig_hash, desired_value);
+
+	ret = rte_thash_adjust_tuple(h, be_tuple, adj, NULL, NULL);
+	RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
+
+	for (i = 0; i < (TUPLE_SZ / 4); i++)
+		*(uint32_t *)&le_tuple[i * 4] =
+			rte_be_to_cpu_32(*(uint32_t *)&be_tuple[i * 4]);
+
+	adj_hash = rte_softrss((uint32_t *)le_tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	/* Pass previously calculated tuple to callback function */
+	memcpy(be_tuple_tmp, be_tuple, TUPLE_SZ);
+
+	memset(be_tuple, 0xab, TUPLE_SZ);
+	ret = rte_thash_adjust_tuple(h, be_tuple, adj, cmp_tuple_eq,
+		&be_tuple_tmp);
+	RTE_TEST_ASSERT(ret == -EEXIST, "adjust tuple didn't indicate collision\n");
+
+	/* Pass another tuple to check with */
+	memset(be_tuple, 0xab, TUPLE_SZ);
+	ret = rte_thash_adjust_tuple(h, be_tuple, adj, cmp_tuple_eq,
+		&le_tuple);
+	RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
+
+	for (i = 0; i < (TUPLE_SZ / 4); i++)
+		*(uint32_t *)&le_tuple[i * 4] =
+			rte_be_to_cpu_32(*(uint32_t *)&be_tuple[i * 4]);
+
+	adj_hash = rte_softrss((uint32_t *)le_tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite thash_tests = {
+	.suite_name = "thash autotest",
+	.setup = NULL,
+	.teardown = NULL,
+	.unit_test_cases = {
+	TEST_CASE(test_toeplitz_hash_calc),
+	TEST_CASE(test_create_invalid),
+	TEST_CASE(test_multiple_create),
+	TEST_CASE(test_free_null),
+	TEST_CASE(test_add_invalid_helper),
+	TEST_CASE(test_find_existing),
+	TEST_CASE(test_get_helper),
+	TEST_CASE(test_period_overflow),
+	TEST_CASE(test_predictable_rss_min_seq),
+	TEST_CASE(test_predictable_rss_multirange),
+	TEST_CASE(test_adjust_tuple),
+	TEST_CASES_END()
+	}
+};
+
+static int
+test_thash(void)
+{
+	return unit_test_suite_runner(&thash_tests);
 }
 
 REGISTER_TEST_COMMAND(thash_autotest, test_thash);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
@ 2021-04-07 12:53   ` Ananyev, Konstantin
  2021-04-11 18:51     ` Medvedkin, Vladimir
  2021-04-10  0:10   ` Wang, Yipeng1
  1 sibling, 1 reply; 23+ messages in thread
From: Ananyev, Konstantin @ 2021-04-07 12:53 UTC (permalink / raw)
  To: Medvedkin, Vladimir, dev
  Cc: Chilikin, Andrey, Kinsella, Ray, Wang, Yipeng1, Gobriel, Sameh,
	Richardson, Bruce

Hi Vladimir,

Few comments below, mostly minor.
One generic one - doc seems missing.
With that in place:
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 
> This patch implements predictable RSS functionality.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/librte_hash/rte_thash.c | 577 ++++++++++++++++++++++++++++++++++++++++++--
>  lib/librte_hash/rte_thash.h |  42 ++++
>  lib/librte_hash/version.map |   1 +
>  3 files changed, 602 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
> index 79e8724..cc60ada 100644
> --- a/lib/librte_hash/rte_thash.c
> +++ b/lib/librte_hash/rte_thash.c
> @@ -12,6 +12,45 @@
>  #include <rte_malloc.h>
> 
>  #define THASH_NAME_LEN		64
> +#define TOEPLITZ_HASH_LEN	32
> +
> +#define	RETA_SZ_MIN	2U
> +#define	RETA_SZ_MAX	16U

Should these RETA_SZ defines be in public header?
So user can know what are allowed values?

> +#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
> +					(reta_sz <= RETA_SZ_MAX))
> +
> +TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
> +static struct rte_tailq_elem rte_thash_tailq = {
> +	.name = "RTE_THASH",
> +};
> +EAL_REGISTER_TAILQ(rte_thash_tailq)
> +
> +/**
> + * Table of some irreducible polinomials over GF(2).
> + * For lfsr they are reperesented in BE bit order, and
> + * x^0 is masked out.
> + * For example, poly x^5 + x^2 + 1 will be represented
> + * as (101001b & 11111b) = 01001b = 0x9
> + */
> +static const uint32_t irreducible_poly_table[][4] = {
> +	{0, 0, 0, 0},	/** < degree 0 */
> +	{1, 1, 1, 1},	/** < degree 1 */
> +	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
> +	{0x5, 0x3, 0x5, 0x3},
> +	{0x9, 0x3, 0x9, 0x3},
> +	{0x9, 0x1b, 0xf, 0x5},
> +	{0x21, 0x33, 0x1b, 0x2d},
> +	{0x41, 0x11, 0x71, 0x9},
> +	{0x71, 0xa9, 0xf5, 0x8d},
> +	{0x21, 0xd1, 0x69, 0x1d9},
> +	{0x81, 0x2c1, 0x3b1, 0x185},
> +	{0x201, 0x541, 0x341, 0x461},
> +	{0x941, 0x609, 0xe19, 0x45d},
> +	{0x1601, 0x1f51, 0x1171, 0x359},
> +	{0x2141, 0x2111, 0x2db1, 0x2109},
> +	{0x4001, 0x801, 0x101, 0x7301},
> +	{0x7781, 0xa011, 0x4211, 0x86d9},
> +};
> 
>  struct thash_lfsr {
>  	uint32_t	ref_cnt;
> @@ -31,8 +70,10 @@ struct rte_thash_subtuple_helper {
>  	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
>  	LIST_ENTRY(rte_thash_subtuple_helper)	next;
>  	struct thash_lfsr	*lfsr;
> -	uint32_t	offset;		/** < Offset in bits of the subtuple */
> -	uint32_t	len;		/** < Length in bits of the subtuple */
> +	uint32_t	offset;		/** < Offset of the m-sequence */
> +	uint32_t	len;		/** < Length of the m-sequence */
> +	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
> +	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
>  	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
>  	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
>  	/** < Complimentary table */
> @@ -48,49 +89,549 @@ struct rte_thash_ctx {
>  	uint8_t		hash_key[0];
>  };
> 
> +static inline uint32_t
> +get_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	/*
> +	 * masking the TAP bits defined by the polynomial and
> +	 * calculating parity
> +	 */
> +	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
> +	ret = lfsr->state & 0x1;
> +	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +get_rev_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
> +	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
> +	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +thash_get_rand_poly(uint32_t poly_degree)
> +{
> +	return irreducible_poly_table[poly_degree][rte_rand() %
> +		RTE_DIM(irreducible_poly_table[poly_degree])];
> +}
> +
> +static struct thash_lfsr *
> +alloc_lfsr(struct rte_thash_ctx *ctx)
> +{
> +	struct thash_lfsr *lfsr;
> +	uint32_t i;
> +
> +	if (ctx == NULL)
> +		return NULL;
> +
> +	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
> +	if (lfsr == NULL)
> +		return NULL;
> +
> +	lfsr->deg = ctx->reta_sz_log;
> +	lfsr->poly = thash_get_rand_poly(lfsr->deg);
> +	do {
> +		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
> +	} while (lfsr->state == 0);
> +	/* init reverse order polynomial */
> +	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
> +	/* init proper rev_state*/
> +	lfsr->rev_state = lfsr->state;
> +	for (i = 0; i <= lfsr->deg; i++)
> +		get_rev_bit_lfsr(lfsr);
> +
> +	/* clear bits_cnt after rev_state was inited */
> +	lfsr->bits_cnt = 0;
> +	lfsr->ref_cnt = 1;
> +
> +	return lfsr;
> +}
> +
> +static void
> +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt++;
> +	h->lfsr = lfsr;
> +}
> +
> +static void
> +free_lfsr(struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt--;
> +	if (lfsr->ref_cnt == 0)
> +		rte_free(lfsr);
> +}
> +
>  struct rte_thash_ctx *
> -rte_thash_init_ctx(const char *name __rte_unused,
> -	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
> -	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> +	uint8_t *key, uint32_t flags)
>  {
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	uint32_t i;

Empty line is  missing.

> +	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* guarantee there's no existing */
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +	ctx = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	/* allocate tailq entry */
> +	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH,
> +			"Can not allocate tailq entry for thash context %s\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto exit;
> +	}
> +
> +	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
> +	if (ctx == NULL) {
> +		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto free_te;
> +	}
> +
> +	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
> +	ctx->key_len = key_len;
> +	ctx->reta_sz_log = reta_sz;
> +	LIST_INIT(&ctx->head);
> +	ctx->flags = flags;
> +
> +	if (key)
> +		rte_memcpy(ctx->hash_key, key, key_len);
> +	else {
> +		for (i = 0; i < key_len; i++)
> +			ctx->hash_key[i] = rte_rand();
> +	}
> +
> +	te->data = (void *)ctx;
> +	TAILQ_INSERT_TAIL(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ctx;
> +free_te:
> +	rte_free(te);
> +exit:
> +	rte_mcfg_tailq_write_unlock();
>  	return NULL;
>  }
> 
>  struct rte_thash_ctx *
> -rte_thash_find_existing(const char *name __rte_unused)
> +rte_thash_find_existing(const char *name)
>  {
> -	return NULL;
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return ctx;
>  }
> 
>  void
> -rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx)
>  {
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	struct rte_thash_subtuple_helper *ent, *tmp;
> +
> +	if (ctx == NULL)
> +		return;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		if (te->data == (void *)ctx)
> +			break;
> +	}
> +
> +	if (te != NULL)
> +		TAILQ_REMOVE(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +	ent = LIST_FIRST(&(ctx->head));
> +	while (ent) {
> +		free_lfsr(ent->lfsr);
> +		tmp = ent;
> +		ent = LIST_NEXT(ent, next);
> +		LIST_REMOVE(tmp, next);
> +		rte_free(tmp);
> +	}
> +
> +	rte_free(ctx);
> +	rte_free(te);
> +}
> +
> +static inline void
> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> +{
> +	uint32_t byte_idx = pos >> 3;

Just as a nit to be consistent with the line below:
pos / CHAR_BIT; 

> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp &= ~(1 << bit_idx);
> +	tmp |= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +/**
> + * writes m-sequence to the hash_key for range [start, end]
> + * (i.e. including start and end positions)
> + */
> +static int
> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
> +	uint32_t start, uint32_t end)
> +{
> +	uint32_t i;
> +	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
> +	req_bits++; /* due to incuding end */
> +
> +	/* check if lfsr overflow period of the m-sequence */
> +	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
> +			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
> +			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
> +		return -ENOSPC;
> +
> +	if (start < end) {
> +		/* original direction (from left to right)*/
> +		for (i = start; i <= end; i++)
> +			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
> +
> +	} else {
> +		/* reverse direction (from right to left) */
> +		for (i = end; i >= start; i--)
> +			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
> +	}
> +
> +	return 0;
> +}
> +
> +static inline uint32_t
> +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
> +{
> +	uint32_t *tmp, val;
> +
> +	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
> +	val = rte_be_to_cpu_32(*tmp);
> +	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
> +		ctx->reta_sz_log));
> +
> +	return val & ((1 << ctx->reta_sz_log) - 1);
> +}
> +
> +static inline void
> +generate_compliment_table(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *h)
> +{
> +	int i, j, k;
> +	uint32_t val;
> +	uint32_t start;
> +
> +	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
> +
> +	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
> +		val = 0;
> +		for (j = i; j; j &= (j - 1)) {
> +			k = rte_bsf32(j);
> +			val ^= get_subvalue(ctx, start - k +
> +				ctx->reta_sz_log - 1);
> +		}
> +		h->compl_table[val] = i;
> +	}
> +}
> +
> +static inline int
> +insert_before(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	uint32_t start, uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if (end < cur_ent->offset) {
> +		ent->lfsr = alloc_lfsr(ctx);
> +		if (ent->lfsr == NULL) {
> +			rte_free(ent);
> +			return -ENOMEM;
> +		}
> +		/* generate nonoverlapping range [start, end) */
> +		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -ENOSPC;
> +	}
> +	attach_lfsr(ent, cur_ent->lfsr);
> +
> +	/**
> +	 * generate partially overlapping range
> +	 * [start, cur_ent->start) in reverse order
> +	 */
> +	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_BEFORE(cur_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +	return 0;
> +}
> +
> +static inline int
> +insert_after(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	struct rte_thash_subtuple_helper *prev_ent,
> +	uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -EEXIST;
> +	}
> +
> +	attach_lfsr(ent, cur_ent->lfsr);
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_AFTER(prev_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
> +	return 0;
>  }
> 
>  int
> -rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused, uint32_t len __rte_unused,
> -	uint32_t offset __rte_unused)
> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
> +	uint32_t offset)
>  {
> +	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
> +	uint32_t start, end;
> +	int ret;
> +
> +	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
> +			((offset + len + TOEPLITZ_HASH_LEN - 1) >
> +			ctx->key_len * CHAR_BIT))
> +		return -EINVAL;
> +
> +	/* Check for existing name*/
> +	LIST_FOREACH(cur_ent, &ctx->head, next) {
> +		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
> +			return -EEXIST;
> +	}
> +
> +	end = offset + len + TOEPLITZ_HASH_LEN - 1;
> +	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
> +		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
> +		offset;
> +
> +	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> +		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);

Helper can be used by data-path code (via rte_thash_get_compliment()) right?
Then might be better to align it at cache-line. 

> +	if (ent == NULL)
> +		return -ENOMEM;
> +
> +	rte_strlcpy(ent->name, name, sizeof(ent->name));
> +	ent->offset = start;
> +	ent->len = end - start;
> +	ent->tuple_offset = offset;
> +	ent->tuple_len = len;
> +	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
> +
> +	cur_ent = LIST_FIRST(&ctx->head);
> +	while (cur_ent) {
> +		uint32_t range_end = cur_ent->offset + cur_ent->len;
> +		next_ent = LIST_NEXT(cur_ent, next);
> +		prev_ent = cur_ent;
> +		/* Iterate through overlapping ranges */
> +		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
> +			range_end = RTE_MAX(next_ent->offset + next_ent->len,
> +				range_end);
> +			if (start > next_ent->offset)
> +				prev_ent = next_ent;
> +
> +			next_ent = LIST_NEXT(next_ent, next);
> +		}
> +
> +		if (start < cur_ent->offset)
> +			return insert_before(ctx, ent, cur_ent, next_ent,
> +				start, end, range_end);
> +		else if (start < range_end)
> +			return insert_after(ctx, ent, cur_ent, next_ent,
> +				prev_ent, end, range_end);
> +
> +		cur_ent = next_ent;
> +		continue;
> +	}
> +
> +	ent->lfsr = alloc_lfsr(ctx);
> +	if (ent->lfsr == NULL) {
> +		rte_free(ent);
> +		return -ENOMEM;
> +	}
> +
> +	/* generate nonoverlapping range [start, end) */
> +	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +	if (LIST_EMPTY(&ctx->head)) {
> +		LIST_INSERT_HEAD(&ctx->head, ent, next);
> +	} else {
> +		LIST_FOREACH(next_ent, &ctx->head, next)
> +			prev_ent = next_ent;
> +
> +		LIST_INSERT_AFTER(prev_ent, ent, next);
> +	}
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
>  	return 0;
>  }
> 
>  struct rte_thash_subtuple_helper *
> -rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused)
> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
>  {
> +	struct rte_thash_subtuple_helper *ent;
> +
> +	if ((ctx == NULL) || (name == NULL))
> +		return NULL;
> +
> +	LIST_FOREACH(ent, &ctx->head, next) {
> +		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
> +			return ent;
> +	}
> +
>  	return NULL;
>  }
> 
>  uint32_t
> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
> -	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> +	uint32_t hash, uint32_t desired_hash)
>  {
> -	return 0;
> +	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>  }

Would it make sense to add another-one for multi values:
rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
So user can get adjustment values for multiple queues at once? 

> 
>  const uint8_t *
> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>  {
> -	return NULL;
> +	return ctx->hash_key;
> +}
> +
> +static inline void
> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> +{
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp ^= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata)
> +{
> +	unsigned i;
> +
> +	if ((h == NULL) || (orig_tuple == NULL))
> +		return -EINVAL;
> +
> +	adj_bits &= h->lsb_msk;
> +	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
> +	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
> +		uint8_t bit = (adj_bits >> i) & 0x1;
> +		if (bit)
> +			xor_bit(orig_tuple, bit,
> +				h->tuple_offset + h->tuple_len - 1 - i);
> +	}
> +
> +	if (fn != NULL)
> +		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
> +
> +	return 0;
>  }

Not sure is there much point to have a callback that is called only once.
Might be better to rework the function in a way that user to provide 2 callbacks -
one to generate new value, second to check.
Something like that:

int
rte_thash_gen_tuple(struct rte_thash_subtuple_helper *h,
	uint8_t *tuple, uint32_t desired_hash,
	int (*cb_gen_tuple)(uint8_t *, void *),
	int (*cb_check_tuple)(const uint8_t *, void *),
	void *userdata) 
{
	do {
		rc = cb_gen_tuple(tuple, userdata);
		if (rc != 0)
			return rc;
		hash = rte_softrss(tuple, ...);
		adj = rte_thash_get_compliment(h, hash, desired_hash);
		update_tuple(tuple, adj, ...);
		rc = cb_check_tuple(tuple, userdata); 
	} while(rc != 0);

             return rc;
}

> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
> index 38a641b..fd67931 100644
> --- a/lib/librte_hash/rte_thash.h
> +++ b/lib/librte_hash/rte_thash.h
> @@ -360,6 +360,48 @@ __rte_experimental
>  const uint8_t *
>  rte_thash_get_key(struct rte_thash_ctx *ctx);
> 
> +/**
> + * Function prototype for the rte_thash_adjust_tuple
> + * to check if adjusted tuple could be used.
> + * Generally it is some kind of lookup function to check
> + * if adjusted tuple is already in use.
> + *
> + * @param userdata
> + *  Pointer to the userdata. It could be a pointer to the
> + *  table with used tuples to search.
> + * @param tuple
> + *  Pointer to the tuple to check
> + *
> + * @return
> + *  1 on success
> + *  0 otherwise
> + */
> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
> +
> +/**
> + * Adjust tuple with complimentary bits.
> + *
> + * @param h
> + *  Pointer to the helper struct
> + * @param orig_tuple
> + *  Pointer to the tuple to be adjusted
> + * @param adj_bits
> + *  Valure returned by rte_thash_get_compliment
> + * @param fn
> + *  Callback function to check adjusted tuple. Could be NULL
> + * @param userdata
> + *  Pointer to the userdata to be passed to fn(). Could be NULL
> + *
> + * @return
> + *  0 on success
> + *  negative otherwise
> + */
> +__rte_experimental
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
> index 93cb230..a992a1e 100644
> --- a/lib/librte_hash/version.map
> +++ b/lib/librte_hash/version.map
> @@ -32,6 +32,7 @@ DPDK_21 {
>  EXPERIMENTAL {
>  	global:
> 
> +	rte_thash_adjust_tuple;
>  	rte_hash_free_key_with_position;
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
@ 2021-04-08 15:56   ` Stephen Hemminger
  2021-04-11 18:51     ` Medvedkin, Vladimir
  2021-04-10  0:32   ` Wang, Yipeng1
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Stephen Hemminger @ 2021-04-08 15:56 UTC (permalink / raw)
  To: Vladimir Medvedkin
  Cc: dev, konstantin.ananyev, andrey.chilikin, ray.kinsella,
	yipeng1.wang, sameh.gobriel, bruce.richardson

On Tue,  6 Apr 2021 20:50:40 +0100
Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:

> This patch series introduces predictable RSS feature.
> It is based on the idea of searching for partial hash collisions
> within Toeplitz hash.
> 
> The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
> where (G, ^) - is a group of tuples and (H, ^) is a group of hashes
> with respect to XOR operation. So tuples and hashes could be treated as
> n-dimension and 32-dimension vector spaces over GF(2).
> So, f(x ^ y) == f(x) ^ f(y)
> where f - is the toeplitz hash function and x, y are tuples.
> 
> The ability to predict partial collisions allows user to compute
> input hash value with desired LSB values.
> Usually number of LSB's are defined by the size of RSS Redirection Table.
> 
> There could be number of use cases, for example:
> 1) NAT. Using this library it is possible to select a new port number
> on a translation in the way that rss hash for original tuple will have
> the same LSB's as rss hash for reverse tuple.
> 2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to
> a desired queue.
> 3) TCP stack. It is possible to choose a source port number for outgoing
> connections in the way that received replies will be assigned to
> desired queue.
> 4) RSS hash key generation. Hash key initialization with random values
> does not guarantee an uniform distribution amongst queues. This library
> uses mathematically proved algorithm to complete the rss hash key to
> provide the best distribution.
> 
> v2:
> - added extra API rte_thash_adjust_tuple()
> - added extra tests for rte_thash_adjust_tuple()
> - added extra fields to rte_thash_subtuple_helper struct
> - fixed typos 
> 
> Vladimir Medvedkin (3):
>   hash: add predictable RSS API
>   hash: add predictable RSS implementation
>   test/hash: add additional thash tests
> 
>  app/test/test_thash.c       | 468 +++++++++++++++++++++++++++++++-
>  lib/librte_hash/meson.build |   3 +-
>  lib/librte_hash/rte_thash.c | 637 ++++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_hash/rte_thash.h | 180 +++++++++++++
>  lib/librte_hash/version.map |   8 +
>  5 files changed, 1289 insertions(+), 7 deletions(-)
>  create mode 100644 lib/librte_hash/rte_thash.c
> 

It would be good to show how this could be used in an application.
Maybe yet another variant/flag to l3fwd example.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API Vladimir Medvedkin
@ 2021-04-10  0:05   ` Wang, Yipeng1
  2021-04-11 18:52     ` Medvedkin, Vladimir
  0 siblings, 1 reply; 23+ messages in thread
From: Wang, Yipeng1 @ 2021-04-10  0:05 UTC (permalink / raw)
  To: Medvedkin, Vladimir, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger

> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Tuesday, April 6, 2021 12:51 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
> <sameh.gobriel@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: [PATCH v2 1/3] hash: add predictable RSS API
> 
> This patch adds predictable RSS API.
> It is based on the idea of searching partial Toeplitz hash collisions.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/librte_hash/meson.build |   3 +-
>  lib/librte_hash/rte_thash.c |  96 ++++++++++++++++++++++++++++++
> lib/librte_hash/rte_thash.h | 138
> ++++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_hash/version.map |   7 +++
>  4 files changed, 243 insertions(+), 1 deletion(-)  create mode 100644
> lib/librte_hash/rte_thash.c
> 
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 242859f..3546014 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h',
>  	'rte_thash.h')
>  indirect_headers += files('rte_crc_arm64.h')
> 
> -sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
> +sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c')
> +deps += ['net']
>  deps += ['ring']
>  deps += ['rcu']
> diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c new file
> mode 100644 index 0000000..79e8724
> --- /dev/null
> +++ b/lib/librte_hash/rte_thash.c
> @@ -0,0 +1,96 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2021 Intel Corporation
> + */
> +
> +#include <rte_thash.h>
> +#include <rte_tailq.h>
> +#include <rte_random.h>
> +#include <rte_memcpy.h>
> +#include <rte_errno.h>
> +#include <rte_eal.h>
> +#include <rte_eal_memconfig.h>
> +#include <rte_malloc.h>
> +
> +#define THASH_NAME_LEN		64
> +
> +struct thash_lfsr {
> +	uint32_t	ref_cnt;
> +	uint32_t	poly;
> +	/**< polynomial associated with the lfsr */
> +	uint32_t	rev_poly;
> +	/**< polynomial to generate the sequence in reverse direction */
> +	uint32_t	state;
> +	/**< current state of the lfsr */
> +	uint32_t	rev_state;
> +	/**< current state of the lfsr for reverse direction */
> +	uint32_t	deg;	/**< polynomial degree*/
> +	uint32_t	bits_cnt;  /**< number of bits generated by lfsr*/
> +};
> +
> +struct rte_thash_subtuple_helper {
> +	char	name[THASH_NAME_LEN];	/** < Name of subtuple
> configuration */
> +	LIST_ENTRY(rte_thash_subtuple_helper)	next;
> +	struct thash_lfsr	*lfsr;
> +	uint32_t	offset;		/** < Offset in bits of the subtuple */
> +	uint32_t	len;		/** < Length in bits of the subtuple
> */
> +	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
> +	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
> +	/** < Complimentary table */
> +};
> +
> +struct rte_thash_ctx {
> +	char		name[THASH_NAME_LEN];
> +	LIST_HEAD(, rte_thash_subtuple_helper) head;
> +	uint32_t	key_len;	/** < Length of the NIC RSS hash key
> */
> +	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
> +	uint32_t	subtuples_nb;	/** < number of subtuples */
> +	uint32_t	flags;
> +	uint8_t		hash_key[0];
> +};
> +
> +struct rte_thash_ctx *
> +rte_thash_init_ctx(const char *name __rte_unused,
> +	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
> +	uint8_t *key __rte_unused, uint32_t flags __rte_unused) {
> +	return NULL;
> +}
> +
> +struct rte_thash_ctx *
> +rte_thash_find_existing(const char *name __rte_unused) {
> +	return NULL;
> +}
> +
> +void
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused) { }
> +
> +int
> +rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
> +	const char *name __rte_unused, uint32_t len __rte_unused,
> +	uint32_t offset __rte_unused)
> +{
> +	return 0;
> +}
> +
> +struct rte_thash_subtuple_helper *
> +rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
> +	const char *name __rte_unused)
> +{
> +	return NULL;
> +}
> +
> +uint32_t
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h
> __rte_unused,
> +	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused) {
> +	return 0;
> +}
> +
> +const uint8_t *
> +rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused) {
> +	return NULL;
> +}
> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index
> 061efa2..38a641b 100644
> --- a/lib/librte_hash/rte_thash.h
> +++ b/lib/librte_hash/rte_thash.h
> @@ -1,5 +1,6 @@
>  /* SPDX-License-Identifier: BSD-3-Clause
>   * Copyright(c) 2015-2019 Vladimir Medvedkin <medvedkinv@gmail.com>
> + * Copyright(c) 2021 Intel Corporation
>   */
> 
>  #ifndef _RTE_THASH_H
> @@ -222,6 +223,143 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t
> input_len,
>  	return ret;
>  }
> 
> +/**
> + * LFSR will ignore if generated m-sequence has more than 2^n -1 bits
> +*/
[Wang, Yipeng] 
I haven't fully got the significance/reasons behind the two flags.
For the comment above, 2^n is the reta_size right?
If so, it is better than commenting 2^n.

For the first flag:
What would be the issue for overflow? I understand that multiple helpers may overlap
on the m-sequence, but since they are for different tuples, what would be the issue?

For the second flag: is it always good to keep it minimum for each helper?

The goal is to have the best default values for user who do not understand the algorithm details.
Less flags is usually better.

> +#define RTE_THASH_IGNORE_PERIOD_OVERFLOW	0x1
> +/**
> + * Generate minimal required bit (equal to ReTa LSB) sequence into
> + * the hash_key
> + */
> +#define RTE_THASH_MINIMAL_SEQ			0x2
> +
> +/** @internal thash context structure. */ struct rte_thash_ctx;
> +/** @internal thash helper structure. */ struct
> +rte_thash_subtuple_helper;
> +
> +/**
> + * Create a new thash context.
> + *
> + * @param name
> + *  context name
> + * @param key_len
> + *  length of the toeplitz hash key
> + * @param reta_sz
> + *  logarithm of the NIC's Redirection Table (ReTa) size,
> + *  i.e. number of the LSBs if the hash used to determine
> + *  the reta entry.
> + * @param key
[Wang, Yipeng] Key will be modified by helper anyway. What is the reason of having
the users to specify the key here?

> + *  pointer to the key used to init an internal key state.
> + *  Could be NULL, in this case internal key will be inited with random.
> + * @param flags
> + *  supported flags are:
> + *   RTE_THASH_IGNORE_PERIOD_OVERFLOW
> + *   RTE_THASH_MINIMAL_SEQ
> + * @return
> + *  A pointer to the created context on success
> + *  NULL otherwise
> + */
> +__rte_experimental
> +struct rte_thash_ctx *
> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> +	uint8_t *key, uint32_t flags);
> +
> +/**
> + * Find an existing thash context and return a pointer to it.
> + *
> + * @param name
> + *  Name of the thash context
> + * @return
> + *  Pointer to the thash context or NULL if it was not found with
> +rte_errno
> + *  set appropriately. Possible rte_errno values include:
> + *   - ENOENT - required entry not available to return.
> + */
> +__rte_experimental
> +struct rte_thash_ctx *
> +rte_thash_find_existing(const char *name);
> +
> +/**
> + * Free a thash context object
> + *
> + * @param ctx
> + *  thash context
> + * @return
> + *  None
> + */
> +__rte_experimental
> +void
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx);
> +
> +/**
> + * Add a special properties to the toeplitz hash key inside a thash context.
> + * Creates an internal helper struct which has a complimentary table
> + * to calculate toeplitz hash collisions.
> + *
> + * @param ctx
> + *  thash context
> + * @param name
> + *  name of the helper
> + * @param len
[Wang, Yipeng] 
Add requirement here so user know the expectation.
e.g. Len should be no shorter than log(reta_size).

> + *  length in bits of the target subtuple
> + * @param offset
> + *  offset in bits of the subtuple
> + * @return
> + *  0 on success
> + *  negative on error
> + */
[Wang, Yipeng] thread-safety for the APIs?
Better to add thread-safety info in the comments.

> +__rte_experimental
> +int
> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name,
> uint32_t len,
> +	uint32_t offset);
> +
> +/**
> + * Find a helper in the context by the given name
> + *
> + * @param ctx
> + *  thash context
> + * @param name
> + *  name of the helper
> + * @return
> + *  Pointer to the thash helper or NULL if it was not found.
> + */
> +__rte_experimental
> +struct rte_thash_subtuple_helper *
> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
> +
> +/**
> + * Get a complimentary value for the subtuple to produce a
[Wang, Yipeng] 
Should it be complimentary->complementary?  compliment -> complement?

> + * partial toeplitz hash collision. It muxt be XOR'ed with the
[Wang, Yipeng] typo *must be
> + * subtuple to produce the hash value with the desired hash LSB's
> + *
> + * @param h
> + *  Pointer to the helper struct
> + * @param hash
> + *  toeplitz hash value calculated for the given tuple
> + * @param desired_hash
> + *  desired hash value to find a collision for
> + * @return
> + *  A complimentary value which must be xored with the corresponding
> +subtuple  */ __rte_experimental uint32_t
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> +	uint32_t hash, uint32_t desired_hash);
> +
> +/**
> + * Get a pointer to the toeplitz hash contained in the context.
> + * It changes after each addition of a helper. It should be installed
> +to
> + * the NIC.
> + *
> + * @param ctx
> + *  thash context
> + * @return
> + *  A pointer to the toeplitz hash key
> + */
> +__rte_experimental
> +const uint8_t *
> +rte_thash_get_key(struct rte_thash_ctx *ctx);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
> c6d7308..93cb230 100644
> --- a/lib/librte_hash/version.map
> +++ b/lib/librte_hash/version.map
> @@ -37,4 +37,11 @@ EXPERIMENTAL {
>  	rte_hash_lookup_with_hash_bulk_data;
>  	rte_hash_max_key_id;
>  	rte_hash_rcu_qsbr_add;
> +	rte_thash_add_helper;
> +	rte_thash_find_existing;
> +	rte_thash_free_ctx;
> +	rte_thash_get_compliment;
> +	rte_thash_get_helper;
> +	rte_thash_get_key;
> +	rte_thash_init_ctx;
>  };
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
  2021-04-07 12:53   ` Ananyev, Konstantin
@ 2021-04-10  0:10   ` Wang, Yipeng1
  2021-04-11 18:52     ` Medvedkin, Vladimir
  1 sibling, 1 reply; 23+ messages in thread
From: Wang, Yipeng1 @ 2021-04-10  0:10 UTC (permalink / raw)
  To: Medvedkin, Vladimir, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger

> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Tuesday, April 6, 2021 12:51 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
> <sameh.gobriel@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: [PATCH v2 2/3] hash: add predictable RSS implementation
> 
> This patch implements predictable RSS functionality.
> 
> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
> ---
>  lib/librte_hash/rte_thash.c | 577
> ++++++++++++++++++++++++++++++++++++++++++--
>  lib/librte_hash/rte_thash.h |  42 ++++
>  lib/librte_hash/version.map |   1 +
>  3 files changed, 602 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c index
> 79e8724..cc60ada 100644
> --- a/lib/librte_hash/rte_thash.c
> +++ b/lib/librte_hash/rte_thash.c
> @@ -12,6 +12,45 @@
>  #include <rte_malloc.h>
> 
>  #define THASH_NAME_LEN		64
> +#define TOEPLITZ_HASH_LEN	32
> +
> +#define	RETA_SZ_MIN	2U
> +#define	RETA_SZ_MAX	16U
> +#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN)
> && \
> +					(reta_sz <= RETA_SZ_MAX))
> +
> +TAILQ_HEAD(rte_thash_list, rte_tailq_entry); static struct
> +rte_tailq_elem rte_thash_tailq = {
> +	.name = "RTE_THASH",
> +};
> +EAL_REGISTER_TAILQ(rte_thash_tailq)
> +
> +/**
> + * Table of some irreducible polinomials over GF(2).
> + * For lfsr they are reperesented in BE bit order, and
> + * x^0 is masked out.
> + * For example, poly x^5 + x^2 + 1 will be represented
> + * as (101001b & 11111b) = 01001b = 0x9  */ static const uint32_t
> +irreducible_poly_table[][4] = {
> +	{0, 0, 0, 0},	/** < degree 0 */
> +	{1, 1, 1, 1},	/** < degree 1 */
> +	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
> +	{0x5, 0x3, 0x5, 0x3},
> +	{0x9, 0x3, 0x9, 0x3},
> +	{0x9, 0x1b, 0xf, 0x5},
> +	{0x21, 0x33, 0x1b, 0x2d},
> +	{0x41, 0x11, 0x71, 0x9},
> +	{0x71, 0xa9, 0xf5, 0x8d},
> +	{0x21, 0xd1, 0x69, 0x1d9},
> +	{0x81, 0x2c1, 0x3b1, 0x185},
> +	{0x201, 0x541, 0x341, 0x461},
> +	{0x941, 0x609, 0xe19, 0x45d},
> +	{0x1601, 0x1f51, 0x1171, 0x359},
> +	{0x2141, 0x2111, 0x2db1, 0x2109},
> +	{0x4001, 0x801, 0x101, 0x7301},
> +	{0x7781, 0xa011, 0x4211, 0x86d9},
> +};
> 
>  struct thash_lfsr {
>  	uint32_t	ref_cnt;
> @@ -31,8 +70,10 @@ struct rte_thash_subtuple_helper {
>  	char	name[THASH_NAME_LEN];	/** < Name of subtuple
> configuration */
>  	LIST_ENTRY(rte_thash_subtuple_helper)	next;
>  	struct thash_lfsr	*lfsr;
> -	uint32_t	offset;		/** < Offset in bits of the subtuple */
> -	uint32_t	len;		/** < Length in bits of the subtuple
> */
> +	uint32_t	offset;		/** < Offset of the m-sequence */
> +	uint32_t	len;		/** < Length of the m-sequence */
> +	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
> +	uint32_t	tuple_len;	/** < Length in bits of the subtuple
> */
>  	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
>  	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
>  	/** < Complimentary table */
> @@ -48,49 +89,549 @@ struct rte_thash_ctx {
>  	uint8_t		hash_key[0];
>  };
> 
> +static inline uint32_t
> +get_bit_lfsr(struct thash_lfsr *lfsr)
> +{
> +	uint32_t bit, ret;
> +
> +	/*
> +	 * masking the TAP bits defined by the polynomial and
> +	 * calculating parity
> +	 */
> +	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
> +	ret = lfsr->state & 0x1;
> +	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +get_rev_bit_lfsr(struct thash_lfsr *lfsr) {
> +	uint32_t bit, ret;
> +
> +	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
> +	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
> +	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
> +		((1 << lfsr->deg) - 1);
> +
> +	lfsr->bits_cnt++;
> +	return ret;
> +}
> +
> +static inline uint32_t
> +thash_get_rand_poly(uint32_t poly_degree) {
> +	return irreducible_poly_table[poly_degree][rte_rand() %
> +		RTE_DIM(irreducible_poly_table[poly_degree])];
> +}
> +
> +static struct thash_lfsr *
> +alloc_lfsr(struct rte_thash_ctx *ctx)
> +{
> +	struct thash_lfsr *lfsr;
> +	uint32_t i;
> +
> +	if (ctx == NULL)
> +		return NULL;
> +
> +	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
> +	if (lfsr == NULL)
> +		return NULL;
> +
> +	lfsr->deg = ctx->reta_sz_log;
> +	lfsr->poly = thash_get_rand_poly(lfsr->deg);
> +	do {
> +		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
> +	} while (lfsr->state == 0);
> +	/* init reverse order polynomial */
> +	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
> +	/* init proper rev_state*/
> +	lfsr->rev_state = lfsr->state;
> +	for (i = 0; i <= lfsr->deg; i++)
> +		get_rev_bit_lfsr(lfsr);
> +
> +	/* clear bits_cnt after rev_state was inited */
> +	lfsr->bits_cnt = 0;
> +	lfsr->ref_cnt = 1;
> +
> +	return lfsr;
> +}
> +
> +static void
> +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr
> +*lfsr) {
> +	lfsr->ref_cnt++;
> +	h->lfsr = lfsr;
> +}
> +
> +static void
> +free_lfsr(struct thash_lfsr *lfsr)
> +{
> +	lfsr->ref_cnt--;
> +	if (lfsr->ref_cnt == 0)
> +		rte_free(lfsr);
> +}
> +
>  struct rte_thash_ctx *
> -rte_thash_init_ctx(const char *name __rte_unused,
> -	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
> -	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
> +	uint8_t *key, uint32_t flags)
>  {
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	uint32_t i;
> +	if ((name == NULL) || (key_len == 0)
> || !RETA_SZ_IN_RANGE(reta_sz)) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_write_lock();
> +
> +	/* guarantee there's no existing */
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +	ctx = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		goto exit;
> +	}
> +
> +	/* allocate tailq entry */
> +	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH,
> +			"Can not allocate tailq entry for thash context %s\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto exit;
> +	}
> +
> +	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
> +	if (ctx == NULL) {
> +		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation
> failed\n",
> +			name);
> +		rte_errno = ENOMEM;
> +		goto free_te;
> +	}
> +
> +	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
> +	ctx->key_len = key_len;
> +	ctx->reta_sz_log = reta_sz;
> +	LIST_INIT(&ctx->head);
> +	ctx->flags = flags;
> +
> +	if (key)
> +		rte_memcpy(ctx->hash_key, key, key_len);
> +	else {
> +		for (i = 0; i < key_len; i++)
> +			ctx->hash_key[i] = rte_rand();
> +	}
> +
> +	te->data = (void *)ctx;
> +	TAILQ_INSERT_TAIL(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return ctx;
> +free_te:
> +	rte_free(te);
> +exit:
> +	rte_mcfg_tailq_write_unlock();
>  	return NULL;
>  }
> 
>  struct rte_thash_ctx *
> -rte_thash_find_existing(const char *name __rte_unused)
> +rte_thash_find_existing(const char *name)
>  {
> -	return NULL;
> +	struct rte_thash_ctx *ctx;
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +
> +	rte_mcfg_tailq_read_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		ctx = (struct rte_thash_ctx *)te->data;
> +		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
> +			break;
> +	}
> +
> +	rte_mcfg_tailq_read_unlock();
> +
> +	if (te == NULL) {
> +		rte_errno = ENOENT;
> +		return NULL;
> +	}
> +
> +	return ctx;
>  }
> 
>  void
> -rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_free_ctx(struct rte_thash_ctx *ctx)
>  {
> +	struct rte_tailq_entry *te;
> +	struct rte_thash_list *thash_list;
> +	struct rte_thash_subtuple_helper *ent, *tmp;
> +
> +	if (ctx == NULL)
> +		return;
> +
> +	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_FOREACH(te, thash_list, next) {
> +		if (te->data == (void *)ctx)
> +			break;
> +	}
> +
> +	if (te != NULL)
> +		TAILQ_REMOVE(thash_list, te, next);
> +
> +	rte_mcfg_tailq_write_unlock();
> +	ent = LIST_FIRST(&(ctx->head));
> +	while (ent) {
> +		free_lfsr(ent->lfsr);
> +		tmp = ent;
> +		ent = LIST_NEXT(ent, next);
> +		LIST_REMOVE(tmp, next);
> +		rte_free(tmp);
> +	}
> +
> +	rte_free(ctx);
> +	rte_free(te);
> +}
> +
> +static inline void
> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) {
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp &= ~(1 << bit_idx);
> +	tmp |= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +/**
> + * writes m-sequence to the hash_key for range [start, end]
> + * (i.e. including start and end positions)  */ static int
> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
> +	uint32_t start, uint32_t end)
> +{
> +	uint32_t i;
> +	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
> +	req_bits++; /* due to incuding end */
> +
> +	/* check if lfsr overflow period of the m-sequence */
> +	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
> +			((ctx->flags &
> RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
> +			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
> +		return -ENOSPC;
[Wang, Yipeng] 
If nospace, should one increase lfsr->deg? Or if it is already the highest deg you predefined then what to do?
Maybe a log msg could help user with more information on the solutions.
> +
> +	if (start < end) {
> +		/* original direction (from left to right)*/
> +		for (i = start; i <= end; i++)
> +			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
> +
> +	} else {
> +		/* reverse direction (from right to left) */
> +		for (i = end; i >= start; i--)
> +			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
> +	}
> +
> +	return 0;
> +}
> +
> +static inline uint32_t
> +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) {
> +	uint32_t *tmp, val;
> +
> +	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
> +	val = rte_be_to_cpu_32(*tmp);
> +	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
> +		ctx->reta_sz_log));
> +
> +	return val & ((1 << ctx->reta_sz_log) - 1); }
> +
> +static inline void
> +generate_compliment_table(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *h)
> +{
> +	int i, j, k;
> +	uint32_t val;
> +	uint32_t start;
> +
> +	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
> +
> +	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
> +		val = 0;
> +		for (j = i; j; j &= (j - 1)) {
> +			k = rte_bsf32(j);
> +			val ^= get_subvalue(ctx, start - k +
> +				ctx->reta_sz_log - 1);
> +		}
> +		h->compl_table[val] = i;
> +	}
> +}
> +
> +static inline int
> +insert_before(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	uint32_t start, uint32_t end, uint32_t range_end) {
> +	int ret;
> +
> +	if (end < cur_ent->offset) {
> +		ent->lfsr = alloc_lfsr(ctx);
> +		if (ent->lfsr == NULL) {
> +			rte_free(ent);
> +			return -ENOMEM;
> +		}
> +		/* generate nonoverlapping range [start, end) */
> +		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -ENOSPC;
> +	}
> +	attach_lfsr(ent, cur_ent->lfsr);
> +
> +	/**
> +	 * generate partially overlapping range
> +	 * [start, cur_ent->start) in reverse order
> +	 */
> +	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_BEFORE(cur_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +	return 0;
> +}
> +
> +static inline int
> +insert_after(struct rte_thash_ctx *ctx,
> +	struct rte_thash_subtuple_helper *ent,
> +	struct rte_thash_subtuple_helper *cur_ent,
> +	struct rte_thash_subtuple_helper *next_ent,
> +	struct rte_thash_subtuple_helper *prev_ent,
> +	uint32_t end, uint32_t range_end)
> +{
> +	int ret;
> +
> +	if ((next_ent != NULL) && (end > next_ent->offset)) {
> +		rte_free(ent);
> +		return -EEXIST;
> +	}
> +
> +	attach_lfsr(ent, cur_ent->lfsr);
> +	if (end > range_end) {
> +		/**
> +		 * generate partially overlapping range
> +		 * (range_end, end)
> +		 */
> +		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
> +		if (ret != 0) {
> +			free_lfsr(ent->lfsr);
> +			rte_free(ent);
> +			return ret;
> +		}
> +	}
> +
> +	LIST_INSERT_AFTER(prev_ent, ent, next);
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
> +	return 0;
>  }
> 
>  int
> -rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused, uint32_t len __rte_unused,
> -	uint32_t offset __rte_unused)
> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name,
> uint32_t len,
> +	uint32_t offset)
>  {
> +	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent,
> *next_ent;
> +	uint32_t start, end;
> +	int ret;
> +
> +	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
> +			((offset + len + TOEPLITZ_HASH_LEN - 1) >
> +			ctx->key_len * CHAR_BIT))
> +		return -EINVAL;
> +
> +	/* Check for existing name*/
> +	LIST_FOREACH(cur_ent, &ctx->head, next) {
> +		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name))
> == 0)
> +			return -EEXIST;
> +	}
> +
> +	end = offset + len + TOEPLITZ_HASH_LEN - 1;
> +	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
> +		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log -
> 1)) :
> +		offset;
> +
> +	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> +		sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> +	if (ent == NULL)
> +		return -ENOMEM;
> +
> +	rte_strlcpy(ent->name, name, sizeof(ent->name));
> +	ent->offset = start;
> +	ent->len = end - start;
> +	ent->tuple_offset = offset;
> +	ent->tuple_len = len;
> +	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
> +
> +	cur_ent = LIST_FIRST(&ctx->head);
> +	while (cur_ent) {
> +		uint32_t range_end = cur_ent->offset + cur_ent->len;
> +		next_ent = LIST_NEXT(cur_ent, next);
> +		prev_ent = cur_ent;
> +		/* Iterate through overlapping ranges */
> +		while ((next_ent != NULL) && (next_ent->offset <
> range_end)) {
> +			range_end = RTE_MAX(next_ent->offset +
> next_ent->len,
> +				range_end);
> +			if (start > next_ent->offset)
> +				prev_ent = next_ent;
> +
> +			next_ent = LIST_NEXT(next_ent, next);
> +		}
> +
> +		if (start < cur_ent->offset)
> +			return insert_before(ctx, ent, cur_ent, next_ent,
> +				start, end, range_end);
> +		else if (start < range_end)
> +			return insert_after(ctx, ent, cur_ent, next_ent,
> +				prev_ent, end, range_end);
> +
> +		cur_ent = next_ent;
> +		continue;
> +	}
> +
> +	ent->lfsr = alloc_lfsr(ctx);
> +	if (ent->lfsr == NULL) {
> +		rte_free(ent);
> +		return -ENOMEM;
> +	}
> +
> +	/* generate nonoverlapping range [start, end) */
> +	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
> +	if (ret != 0) {
> +		free_lfsr(ent->lfsr);
> +		rte_free(ent);
> +		return ret;
> +	}
> +	if (LIST_EMPTY(&ctx->head)) {
> +		LIST_INSERT_HEAD(&ctx->head, ent, next);
> +	} else {
> +		LIST_FOREACH(next_ent, &ctx->head, next)
> +			prev_ent = next_ent;
> +
> +		LIST_INSERT_AFTER(prev_ent, ent, next);
> +	}
> +	generate_compliment_table(ctx, ent);
> +	ctx->subtuples_nb++;
> +
>  	return 0;
>  }
> 
>  struct rte_thash_subtuple_helper *
> -rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
> -	const char *name __rte_unused)
> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
>  {
> +	struct rte_thash_subtuple_helper *ent;
> +
> +	if ((ctx == NULL) || (name == NULL))
> +		return NULL;
> +
> +	LIST_FOREACH(ent, &ctx->head, next) {
> +		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
> +			return ent;
> +	}
> +
>  	return NULL;
>  }
> 
>  uint32_t
> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h
> __rte_unused,
> -	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> +	uint32_t hash, uint32_t desired_hash)
>  {
> -	return 0;
> +	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>  }
> 
>  const uint8_t *
> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>  {
> -	return NULL;
> +	return ctx->hash_key;
> +}
> +
> +static inline void
> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) {
> +	uint32_t byte_idx = pos >> 3;
> +	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> +	uint8_t tmp;
> +
> +	tmp = ptr[byte_idx];
> +	tmp ^= bit << bit_idx;
> +	ptr[byte_idx] = tmp;
> +}
> +
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata) {
> +	unsigned i;
> +
> +	if ((h == NULL) || (orig_tuple == NULL))
> +		return -EINVAL;
> +
> +	adj_bits &= h->lsb_msk;
> +	/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
> +	for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
> +		uint8_t bit = (adj_bits >> i) & 0x1;
> +		if (bit)
> +			xor_bit(orig_tuple, bit,
> +				h->tuple_offset + h->tuple_len - 1 - i);
> +	}
> +
> +	if (fn != NULL)
> +		return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
> +
> +	return 0;
>  }
> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index
> 38a641b..fd67931 100644
> --- a/lib/librte_hash/rte_thash.h
> +++ b/lib/librte_hash/rte_thash.h
> @@ -360,6 +360,48 @@ __rte_experimental
>  const uint8_t *
>  rte_thash_get_key(struct rte_thash_ctx *ctx);
> 
> +/**
> + * Function prototype for the rte_thash_adjust_tuple
> + * to check if adjusted tuple could be used.
> + * Generally it is some kind of lookup function to check
> + * if adjusted tuple is already in use.
> + *
> + * @param userdata
> + *  Pointer to the userdata. It could be a pointer to the
> + *  table with used tuples to search.
> + * @param tuple
> + *  Pointer to the tuple to check
> + *
> + * @return
> + *  1 on success
> + *  0 otherwise
> + */
> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
> +
> +/**
> + * Adjust tuple with complimentary bits.
> + *
[Wang, Yipeng] 
More explanation for this API is needed.
My understanding is that user should call this function in a loop, until
the above callback function returns success thus this function succeeds.
BTW, why not put this API in the first API commit?

> + * @param h
> + *  Pointer to the helper struct
> + * @param orig_tuple
> + *  Pointer to the tuple to be adjusted
> + * @param adj_bits
> + *  Valure returned by rte_thash_get_compliment
[Wang, Yipeng] typo. *value
> + * @param fn
> + *  Callback function to check adjusted tuple. Could be NULL
> + * @param userdata
> + *  Pointer to the userdata to be passed to fn(). Could be NULL
> + *
> + * @return
> + *  0 on success
> + *  negative otherwise
> + */
> +__rte_experimental
> +int
> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
> +	uint8_t *orig_tuple, uint32_t adj_bits,
> +	rte_thash_check_tuple_t fn, void *userdata);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
> 93cb230..a992a1e 100644
> --- a/lib/librte_hash/version.map
> +++ b/lib/librte_hash/version.map
> @@ -32,6 +32,7 @@ DPDK_21 {
>  EXPERIMENTAL {
>  	global:
> 
> +	rte_thash_adjust_tuple;
>  	rte_hash_free_key_with_position;
>  	rte_hash_lookup_with_hash_bulk;
>  	rte_hash_lookup_with_hash_bulk_data;
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
  2021-04-08 15:56   ` Stephen Hemminger
@ 2021-04-10  0:32   ` Wang, Yipeng1
  2021-04-11 18:51     ` Medvedkin, Vladimir
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 " Vladimir Medvedkin
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 23+ messages in thread
From: Wang, Yipeng1 @ 2021-04-10  0:32 UTC (permalink / raw)
  To: Medvedkin, Vladimir, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger

> -----Original Message-----
> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
> Sent: Tuesday, April 6, 2021 12:51 PM
> To: dev@dpdk.org
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
> <sameh.gobriel@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: [PATCH v2 0/3] Predictable RSS feature
> 
> This patch series introduces predictable RSS feature.
> It is based on the idea of searching for partial hash collisions within Toeplitz
> hash.
> 
> The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
> where (G, ^) - is a group of tuples and (H, ^) is a group of hashes with respect
> to XOR operation. So tuples and hashes could be treated as n-dimension and
> 32-dimension vector spaces over GF(2).
> So, f(x ^ y) == f(x) ^ f(y)
> where f - is the toeplitz hash function and x, y are tuples.
> 
> The ability to predict partial collisions allows user to compute input hash value
> with desired LSB values.
> Usually number of LSB's are defined by the size of RSS Redirection Table.
> 
> There could be number of use cases, for example:
> 1) NAT. Using this library it is possible to select a new port number on a
> translation in the way that rss hash for original tuple will have the same LSB's
> as rss hash for reverse tuple.
> 2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to a
> desired queue.
> 3) TCP stack. It is possible to choose a source port number for outgoing
> connections in the way that received replies will be assigned to desired
> queue.
> 4) RSS hash key generation. Hash key initialization with random values does
> not guarantee an uniform distribution amongst queues. This library uses
> mathematically proved algorithm to complete the rss hash key to provide the
> best distribution.
> 
> v2:
> - added extra API rte_thash_adjust_tuple()
> - added extra tests for rte_thash_adjust_tuple()
> - added extra fields to rte_thash_subtuple_helper struct
> - fixed typos
> 
> Vladimir Medvedkin (3):
>   hash: add predictable RSS API
>   hash: add predictable RSS implementation
>   test/hash: add additional thash tests
> 
>  app/test/test_thash.c       | 468 +++++++++++++++++++++++++++++++-
>  lib/librte_hash/meson.build |   3 +-
>  lib/librte_hash/rte_thash.c | 637
> ++++++++++++++++++++++++++++++++++++++++++++
>  lib/librte_hash/rte_thash.h | 180 +++++++++++++
>  lib/librte_hash/version.map |   8 +
>  5 files changed, 1289 insertions(+), 7 deletions(-)  create mode 100644
> lib/librte_hash/rte_thash.c
> 
> --
> 2.7.4

[Wang, Yipeng] 
Hi, Vladimir, thanks for the patch!
I haven't fully understood every bit of the algorithm yet, 
but I did see issues that this patch could potentially solve.
My understanding is that there are some restrictions for the current implementation,
for example, it only supports port(16-bit) manipulation, but not multiple fields or IP. 
Still, I think it should be good for the use cases you listed. I would love to hear
more feedbacks from people who are more familiar with doing NAT in production systems.

For me, besides the comments I sent earlier,
good documentation and references are needed with clear usage examples, as others pointed out already.

Also, the current API design seems a bit cumbersome.
To use the library, one needs:
Init_ctx
Add_helper.
Get_helper
Get_complement
Then in a loop:
Adjust_tuples
Then XOR with the current tuple

I wonder if an alternative all-in-one API could be designed for simpler use cases.

Thanks!




^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-04-07 12:53   ` Ananyev, Konstantin
@ 2021-04-11 18:51     ` Medvedkin, Vladimir
  2021-04-12  9:47       ` Ananyev, Konstantin
  0 siblings, 1 reply; 23+ messages in thread
From: Medvedkin, Vladimir @ 2021-04-11 18:51 UTC (permalink / raw)
  To: Ananyev, Konstantin, dev
  Cc: Chilikin, Andrey, Kinsella, Ray, Wang, Yipeng1, Gobriel, Sameh,
	Richardson, Bruce

Hi Konstantin,

Thanks for the review,

On 07/04/2021 15:53, Ananyev, Konstantin wrote:
> Hi Vladimir,
> 
> Few comments below, mostly minor.
> One generic one - doc seems missing.
> With that in place:
> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> 
>>
>> This patch implements predictable RSS functionality.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>

<snip>

>> +#defineRETA_SZ_MIN2U
>> +#defineRETA_SZ_MAX16U
> 
> Should these RETA_SZ defines be in public header?
> So user can know what are allowed values?
> 

I don't think this is necessary, because the user chooses it not 
arbitrary, but depending on the NIC.

>> +#define RETA_SZ_IN_RANGE(reta_sz)((reta_sz >= RETA_SZ_MIN) && \

<snip>

>> +uint32_t i;
> 
> Empty line is  missing.
> 

Thanks

>> +if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
>> +rte_errno = EINVAL;
>> +return NULL;
>> +}

<snip>

>> +static inline void
>> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
>> +{
>> +uint32_t byte_idx = pos >> 3;
> 
> Just as a nit to be consistent with the line below:
> pos / CHAR_BIT;
> 

Fixed

>> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
>> +uint8_t tmp;

<snip>

>> +ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
>> +sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> 
> Helper can be used by data-path code (via rte_thash_get_compliment()) right?
> Then might be better to align it at cache-line.
> 

Agree, I'll fix it

>> +if (ent == NULL)
>> +return -ENOMEM;

<snip>

>>   uint32_t
>> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
>> -uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
>> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
>> +uint32_t hash, uint32_t desired_hash)
>>   {
>> -return 0;
>> +return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
>>   }
> 
> Would it make sense to add another-one for multi values:
> rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
> So user can get adjustment values for multiple queues at once?
> 

At the moment I can't find scenarios why do we need to have a bulk 
version for this function

>>
>>   const uint8_t *
>> -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
>> +rte_thash_get_key(struct rte_thash_ctx *ctx)
>>   {
>> -return NULL;
>> +return ctx->hash_key;
>> +}
>> +
>> +static inline void
>> +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
>> +{
>> +uint32_t byte_idx = pos >> 3;
>> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
>> +uint8_t tmp;
>> +
>> +tmp = ptr[byte_idx];
>> +tmp ^= bit << bit_idx;
>> +ptr[byte_idx] = tmp;
>> +}
>> +
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata)
>> +{
>> +unsigned i;
>> +
>> +if ((h == NULL) || (orig_tuple == NULL))
>> +return -EINVAL;
>> +
>> +adj_bits &= h->lsb_msk;
>> +/* Hint: LSB of adj_bits corresponds to offset + len bit of tuple */
>> +for (i = 0; i < sizeof(uint32_t) * CHAR_BIT; i++) {
>> +uint8_t bit = (adj_bits >> i) & 0x1;
>> +if (bit)
>> +xor_bit(orig_tuple, bit,
>> +h->tuple_offset + h->tuple_len - 1 - i);
>> +}
>> +
>> +if (fn != NULL)
>> +return (fn(userdata, orig_tuple)) ? 0 : -EEXIST;
>> +
>> +return 0;
>>   }
> 
> Not sure is there much point to have a callback that is called only once.
> Might be better to rework the function in a way that user to provide 2 callbacks -
> one to generate new value, second to check.
> Something like that:
> 
> int
> rte_thash_gen_tuple(struct rte_thash_subtuple_helper *h,
> uint8_t *tuple, uint32_t desired_hash,
> int (*cb_gen_tuple)(uint8_t *, void *),
> int (*cb_check_tuple)(const uint8_t *, void *),
> void *userdata)
> {
> do {
> rc = cb_gen_tuple(tuple, userdata);
> if (rc != 0)
> return rc;
> hash = rte_softrss(tuple, ...);
> adj = rte_thash_get_compliment(h, hash, desired_hash);
> update_tuple(tuple, adj, ...);
> rc = cb_check_tuple(tuple, userdata);
> } while(rc != 0);
> 
>               return rc;
> }

Agree, there is no point to call the callback for a single function 
call. I'll rewrite rte_thash_adjust_tuple() and send a new version an 
v3. As for gen_tuple, I think we don't need to have a separate callback,
new rte_thash_adjust_tuple implementation randomly changes corresponding 
bits (based on configured offset and length in the helper) in the tuple.

> 
>> diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
>> index 38a641b..fd67931 100644
>> --- a/lib/librte_hash/rte_thash.h
>> +++ b/lib/librte_hash/rte_thash.h
>> @@ -360,6 +360,48 @@ __rte_experimental
>>   const uint8_t *
>>   rte_thash_get_key(struct rte_thash_ctx *ctx);
>>
>> +/**
>> + * Function prototype for the rte_thash_adjust_tuple
>> + * to check if adjusted tuple could be used.
>> + * Generally it is some kind of lookup function to check
>> + * if adjusted tuple is already in use.
>> + *
>> + * @param userdata
>> + *  Pointer to the userdata. It could be a pointer to the
>> + *  table with used tuples to search.
>> + * @param tuple
>> + *  Pointer to the tuple to check
>> + *
>> + * @return
>> + *  1 on success
>> + *  0 otherwise
>> + */
>> +typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
>> +
>> +/**
>> + * Adjust tuple with complimentary bits.
>> + *
>> + * @param h
>> + *  Pointer to the helper struct
>> + * @param orig_tuple
>> + *  Pointer to the tuple to be adjusted
>> + * @param adj_bits
>> + *  Valure returned by rte_thash_get_compliment
>> + * @param fn
>> + *  Callback function to check adjusted tuple. Could be NULL
>> + * @param userdata
>> + *  Pointer to the userdata to be passed to fn(). Could be NULL
>> + *
>> + * @return
>> + *  0 on success
>> + *  negative otherwise
>> + */
>> +__rte_experimental
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
>> index 93cb230..a992a1e 100644
>> --- a/lib/librte_hash/version.map
>> +++ b/lib/librte_hash/version.map
>> @@ -32,6 +32,7 @@ DPDK_21 {
>>   EXPERIMENTAL {
>>   global:
>>
>> +rte_thash_adjust_tuple;
>>   rte_hash_free_key_with_position;
>>   rte_hash_lookup_with_hash_bulk;
>>   rte_hash_lookup_with_hash_bulk_data;
>> --
>> 2.7.4
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature
  2021-04-08 15:56   ` Stephen Hemminger
@ 2021-04-11 18:51     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 23+ messages in thread
From: Medvedkin, Vladimir @ 2021-04-11 18:51 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, konstantin.ananyev, andrey.chilikin, ray.kinsella,
	yipeng1.wang, sameh.gobriel, bruce.richardson

Hi Stephen,

Thanks for the feedback,

On 08/04/2021 18:56, Stephen Hemminger wrote:
> On Tue,  6 Apr 2021 20:50:40 +0100
> Vladimir Medvedkin <vladimir.medvedkin@intel.com> wrote:
> 
>> This patch series introduces predictable RSS feature.
>> It is based on the idea of searching for partial hash collisions
>> within Toeplitz hash.
>>
>> The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
>> where (G, ^) - is a group of tuples and (H, ^) is a group of hashes
>> with respect to XOR operation. So tuples and hashes could be treated as
>> n-dimension and 32-dimension vector spaces over GF(2).
>> So, f(x ^ y) == f(x) ^ f(y)
>> where f - is the toeplitz hash function and x, y are tuples.
>>
>> The ability to predict partial collisions allows user to compute
>> input hash value with desired LSB values.
>> Usually number of LSB's are defined by the size of RSS Redirection Table.
>>
>> There could be number of use cases, for example:
>> 1) NAT. Using this library it is possible to select a new port number
>> on a translation in the way that rss hash for original tuple will have
>> the same LSB's as rss hash for reverse tuple.
>> 2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to
>> a desired queue.
>> 3) TCP stack. It is possible to choose a source port number for outgoing
>> connections in the way that received replies will be assigned to
>> desired queue.
>> 4) RSS hash key generation. Hash key initialization with random values
>> does not guarantee an uniform distribution amongst queues. This library
>> uses mathematically proved algorithm to complete the rss hash key to
>> provide the best distribution.
>>
>> v2:
>> - added extra API rte_thash_adjust_tuple()
>> - added extra tests for rte_thash_adjust_tuple()
>> - added extra fields to rte_thash_subtuple_helper struct
>> - fixed typos
>>
>> Vladimir Medvedkin (3):
>>    hash: add predictable RSS API
>>    hash: add predictable RSS implementation
>>    test/hash: add additional thash tests
>>
>>   app/test/test_thash.c       | 468 +++++++++++++++++++++++++++++++-
>>   lib/librte_hash/meson.build |   3 +-
>>   lib/librte_hash/rte_thash.c | 637 ++++++++++++++++++++++++++++++++++++++++++++
>>   lib/librte_hash/rte_thash.h | 180 +++++++++++++
>>   lib/librte_hash/version.map |   8 +
>>   5 files changed, 1289 insertions(+), 7 deletions(-)
>>   create mode 100644 lib/librte_hash/rte_thash.c
>>
> 
> It would be good to show how this could be used in an application.
> Maybe yet another variant/flag to l3fwd example.

Agree, I think it would be great to have a simple NAT implementation in 
examples. We've discussed this and will probably add in next releases.

> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature
  2021-04-10  0:32   ` Wang, Yipeng1
@ 2021-04-11 18:51     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 23+ messages in thread
From: Medvedkin, Vladimir @ 2021-04-11 18:51 UTC (permalink / raw)
  To: Wang, Yipeng1, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger

Hi Yipeng,

Thanks for the review,

On 10/04/2021 03:32, Wang, Yipeng1 wrote:
>> -----Original Message-----
>> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
>> Sent: Tuesday, April 6, 2021 12:51 PM
>> To: dev@dpdk.org
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
>> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
>> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
>> <sameh.gobriel@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>
>> Subject: [PATCH v2 0/3] Predictable RSS feature
>>
>> This patch series introduces predictable RSS feature.
>> It is based on the idea of searching for partial hash collisions within Toeplitz
>> hash.
>>
>> The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
>> where (G, ^) - is a group of tuples and (H, ^) is a group of hashes with respect
>> to XOR operation. So tuples and hashes could be treated as n-dimension and
>> 32-dimension vector spaces over GF(2).
>> So, f(x ^ y) == f(x) ^ f(y)
>> where f - is the toeplitz hash function and x, y are tuples.
>>
>> The ability to predict partial collisions allows user to compute input hash value
>> with desired LSB values.
>> Usually number of LSB's are defined by the size of RSS Redirection Table.
>>
>> There could be number of use cases, for example:
>> 1) NAT. Using this library it is possible to select a new port number on a
>> translation in the way that rss hash for original tuple will have the same LSB's
>> as rss hash for reverse tuple.
>> 2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to a
>> desired queue.
>> 3) TCP stack. It is possible to choose a source port number for outgoing
>> connections in the way that received replies will be assigned to desired
>> queue.
>> 4) RSS hash key generation. Hash key initialization with random values does
>> not guarantee an uniform distribution amongst queues. This library uses
>> mathematically proved algorithm to complete the rss hash key to provide the
>> best distribution.
>>
>> v2:
>> - added extra API rte_thash_adjust_tuple()
>> - added extra tests for rte_thash_adjust_tuple()
>> - added extra fields to rte_thash_subtuple_helper struct
>> - fixed typos
>>
>> Vladimir Medvedkin (3):
>>    hash: add predictable RSS API
>>    hash: add predictable RSS implementation
>>    test/hash: add additional thash tests
>>
>>   app/test/test_thash.c       | 468 +++++++++++++++++++++++++++++++-
>>   lib/librte_hash/meson.build |   3 +-
>>   lib/librte_hash/rte_thash.c | 637
>> ++++++++++++++++++++++++++++++++++++++++++++
>>   lib/librte_hash/rte_thash.h | 180 +++++++++++++
>>   lib/librte_hash/version.map |   8 +
>>   5 files changed, 1289 insertions(+), 7 deletions(-)  create mode 100644
>> lib/librte_hash/rte_thash.c
>>
>> --
>> 2.7.4
> 
> [Wang, Yipeng]
> Hi, Vladimir, thanks for the patch!
> I haven't fully understood every bit of the algorithm yet,
> but I did see issues that this patch could potentially solve.
> My understanding is that there are some restrictions for the current implementation,
> for example, it only supports port(16-bit) manipulation, but not multiple fields or IP.

It supports any bit-range ( with size >= reta_sz, configured for the 
CTX) manipulations within a tuple as well as multiple number of them.

> Still, I think it should be good for the use cases you listed. I would love to hear
> more feedbacks from people who are more familiar with doing NAT in production systems.
> 
> For me, besides the comments I sent earlier,
> good documentation and references are needed with clear usage examples, as others pointed out already.
> 
> Also, the current API design seems a bit cumbersome.
> To use the library, one needs:
> Init_ctx
> Add_helper.
> Get_helper
> Get_complement
> Then in a loop:
> Adjust_tuples
> Then XOR with the current tuple
> 
> I wonder if an alternative all-in-one API could be designed for simpler use cases.

Agree, v3 will have a new rte_thash_adjust_tuple() implementation that 
will make everything much easier.

> 
> Thanks!
> >
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API
  2021-04-10  0:05   ` Wang, Yipeng1
@ 2021-04-11 18:52     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 23+ messages in thread
From: Medvedkin, Vladimir @ 2021-04-11 18:52 UTC (permalink / raw)
  To: Wang, Yipeng1, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger



On 10/04/2021 03:05, Wang, Yipeng1 wrote:
>> -----Original Message-----
>> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
>> Sent: Tuesday, April 6, 2021 12:51 PM
>> To: dev@dpdk.org
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
>> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
>> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
>> <sameh.gobriel@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>
>> Subject: [PATCH v2 1/3] hash: add predictable RSS API
>>
>> This patch adds predictable RSS API.
>> It is based on the idea of searching partial Toeplitz hash collisions.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>   lib/librte_hash/meson.build |   3 +-
>>   lib/librte_hash/rte_thash.c |  96 ++++++++++++++++++++++++++++++
>> lib/librte_hash/rte_thash.h | 138
>> ++++++++++++++++++++++++++++++++++++++++++++
>>   lib/librte_hash/version.map |   7 +++
>>   4 files changed, 243 insertions(+), 1 deletion(-)  create mode 100644
>> lib/librte_hash/rte_thash.c
>>

<snip>

>> + * LFSR will ignore if generated m-sequence has more than 2^n -1 bits
>> +*/
> [Wang, Yipeng]
> I haven't fully got the significance/reasons behind the two flags.
> For the comment above, 2^n is the reta_size right?

Here "2^n - 1" is a length of m-sequence - a pseudorandom bit sequence 
which has a number of mathematical properties we need.

> If so, it is better than commenting 2^n.
> 
> For the first flag:
> What would be the issue for overflow? I understand that multiple helpers may overlap
> on the m-sequence, but since they are for different tuples, what would be the issue?
> 

M-sequence has a period and after (2^n - 1) bits it the sequence it 
repeats. Eventually it is written to the rss hash key. In some 
circumstances an attack with spoofed packets can be made to overflow 
particular NIC queue.
So generally this flag should be used for tests, for example to spread 
evenly traffic from the packet generator among the queues.

> For the second flag: is it always good to keep it minimum for each helper?
> 

Not always, without this flag an m-sequence till be generated for all 
variable bits, for example for 16 bits of port. And if we know that all 
values of this 16-bit port are equally probable then the distribution of 
the hash LSB's will be even.

On the other hand, if the user have a number of helpers which shares a 
single m-sequence, then there could be an overflow. And having this flag 
could break a single m-sequence with two independent.

> The goal is to have the best default values for user who do not understand the algorithm details.
> Less flags is usually better.
> 

I think the default value of 0 for flags is the best general use case.

>> +#define RTE_THASH_IGNORE_PERIOD_OVERFLOW0x1
>> +/**
>> + * Generate minimal required bit (equal to ReTa LSB) sequence into
>> + * the hash_key
>> + */
>> +#define RTE_THASH_MINIMAL_SEQ0x2
>> +
>> +/** @internal thash context structure. */ struct rte_thash_ctx;
>> +/** @internal thash helper structure. */ struct
>> +rte_thash_subtuple_helper;
>> +
>> +/**
>> + * Create a new thash context.
>> + *
>> + * @param name
>> + *  context name
>> + * @param key_len
>> + *  length of the toeplitz hash key
>> + * @param reta_sz
>> + *  logarithm of the NIC's Redirection Table (ReTa) size,
>> + *  i.e. number of the LSBs if the hash used to determine
>> + *  the reta entry.
>> + * @param key
> [Wang, Yipeng] Key will be modified by helper anyway. What is the reason of having
> the users to specify the key here?
> 

In some cases user will want to specify particular key. For example, if 
user wants to symmetrically load balance ipv4/tcp and do the NAT inside 
the tunnel without decapsulation, user will submit key with repeated 
2-byte values.

>> + *  pointer to the key used to init an internal key state.
>> + *  Could be NULL, in this case internal key will be inited with random.
>> + * @param flags
>> + *  supported flags are:
>> + *   RTE_THASH_IGNORE_PERIOD_OVERFLOW
>> + *   RTE_THASH_MINIMAL_SEQ
>> + * @return
>> + *  A pointer to the created context on success
>> + *  NULL otherwise
>> + */
>> +__rte_experimental
>> +struct rte_thash_ctx *
>> +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
>> +uint8_t *key, uint32_t flags);
>> +

<snip>

>> +/**
>> + * Add a special properties to the toeplitz hash key inside a thash context.
>> + * Creates an internal helper struct which has a complimentary table
>> + * to calculate toeplitz hash collisions.
>> + *
>> + * @param ctx
>> + *  thash context
>> + * @param name
>> + *  name of the helper
>> + * @param len
> [Wang, Yipeng]
> Add requirement here so user know the expectation.
> e.g. Len should be no shorter than log(reta_size).
> 

Agree, I'll add

>> + *  length in bits of the target subtuple
>> + * @param offset
>> + *  offset in bits of the subtuple
>> + * @return
>> + *  0 on success
>> + *  negative on error
>> + */
> [Wang, Yipeng] thread-safety for the APIs?
> Better to add thread-safety info in the comments.
> 

Agree, I'll add

>> +__rte_experimental
>> +int
>> +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name,
>> uint32_t len,
>> +uint32_t offset);
>> +
>> +/**
>> + * Find a helper in the context by the given name
>> + *
>> + * @param ctx
>> + *  thash context
>> + * @param name
>> + *  name of the helper
>> + * @return
>> + *  Pointer to the thash helper or NULL if it was not found.
>> + */
>> +__rte_experimental
>> +struct rte_thash_subtuple_helper *
>> +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
>> +
>> +/**
>> + * Get a complimentary value for the subtuple to produce a
> [Wang, Yipeng]
> Should it be complimentary->complementary?  compliment -> complement?
> 

Agree, will fix

>> + * partial toeplitz hash collision. It muxt be XOR'ed with the
> [Wang, Yipeng] typo *must be

will fix

>> + * subtuple to produce the hash value with the desired hash LSB's
>> + *
>> + * @param h
>> + *  Pointer to the helper struct
>> + * @param hash
>> + *  toeplitz hash value calculated for the given tuple
>> + * @param desired_hash
>> + *  desired hash value to find a collision for
>> + * @return
>> + *  A complimentary value which must be xored with the corresponding
>> +subtuple  */ __rte_experimental uint32_t
>> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
>> +uint32_t hash, uint32_t desired_hash);
>> +
>> +/**
>> + * Get a pointer to the toeplitz hash contained in the context.
>> + * It changes after each addition of a helper. It should be installed
>> +to
>> + * the NIC.
>> + *
>> + * @param ctx
>> + *  thash context
>> + * @return
>> + *  A pointer to the toeplitz hash key
>> + */
>> +__rte_experimental
>> +const uint8_t *
>> +rte_thash_get_key(struct rte_thash_ctx *ctx);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
>> c6d7308..93cb230 100644
>> --- a/lib/librte_hash/version.map
>> +++ b/lib/librte_hash/version.map
>> @@ -37,4 +37,11 @@ EXPERIMENTAL {
>>   rte_hash_lookup_with_hash_bulk_data;
>>   rte_hash_max_key_id;
>>   rte_hash_rcu_qsbr_add;
>> +rte_thash_add_helper;
>> +rte_thash_find_existing;
>> +rte_thash_free_ctx;
>> +rte_thash_get_compliment;
>> +rte_thash_get_helper;
>> +rte_thash_get_key;
>> +rte_thash_init_ctx;
>>   };
>> --
>> 2.7.4
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-04-10  0:10   ` Wang, Yipeng1
@ 2021-04-11 18:52     ` Medvedkin, Vladimir
  0 siblings, 0 replies; 23+ messages in thread
From: Medvedkin, Vladimir @ 2021-04-11 18:52 UTC (permalink / raw)
  To: Wang, Yipeng1, dev
  Cc: Ananyev, Konstantin, Chilikin, Andrey, Kinsella, Ray, Gobriel,
	Sameh, Richardson, Bruce, Stephen Hemminger



On 10/04/2021 03:10, Wang, Yipeng1 wrote:
>> -----Original Message-----
>> From: Medvedkin, Vladimir <vladimir.medvedkin@intel.com>
>> Sent: Tuesday, April 6, 2021 12:51 PM
>> To: dev@dpdk.org
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Chilikin, Andrey
>> <andrey.chilikin@intel.com>; Kinsella, Ray <ray.kinsella@intel.com>; Wang,
>> Yipeng1 <yipeng1.wang@intel.com>; Gobriel, Sameh
>> <sameh.gobriel@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>
>> Subject: [PATCH v2 2/3] hash: add predictable RSS implementation
>>
>> This patch implements predictable RSS functionality.
>>
>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
>> ---
>>   lib/librte_hash/rte_thash.c | 577
>> ++++++++++++++++++++++++++++++++++++++++++--
>>   lib/librte_hash/rte_thash.h |  42 ++++
>>   lib/librte_hash/version.map |   1 +
>>   3 files changed, 602 insertions(+), 18 deletions(-)
>>

<snip>

>> +/**
>> + * writes m-sequence to the hash_key for range [start, end]
>> + * (i.e. including start and end positions)  */ static int
>> +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
>> +uint32_t start, uint32_t end)
>> +{
>> +uint32_t i;
>> +uint32_t req_bits = (start < end) ? (end - start) : (start - end);
>> +req_bits++; /* due to incuding end */
>> +
>> +/* check if lfsr overflow period of the m-sequence */
>> +if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
>> +((ctx->flags &
>> RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
>> +RTE_THASH_IGNORE_PERIOD_OVERFLOW))
>> +return -ENOSPC;
> [Wang, Yipeng]
> If nospace, should one increase lfsr->deg? Or if it is already the highest deg you predefined then what to do?
> Maybe a log msg could help user with more information on the solutions.

It is not possible to increase the degree of lfsr due to mathematical 
restrictions. It must be exactly equal to the number of bits for which 
we want to have a collision.

>> +
>> +if (start < end) {
>> +/* original direction (from left to right)*/
>> +for (i = start; i <= end; i++)
>> +set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);

<snip>

>> +/**
>> + * Adjust tuple with complimentary bits.
>> + *
> [Wang, Yipeng]
> More explanation for this API is needed.
> My understanding is that user should call this function in a loop, until
> the above callback function returns success thus this function succeeds.

I'm going to rewrite this function in v3. The loop will be internal.

> BTW, why not put this API in the first API commit?
> 

My fault, will fix this

>> + * @param h
>> + *  Pointer to the helper struct
>> + * @param orig_tuple
>> + *  Pointer to the tuple to be adjusted
>> + * @param adj_bits
>> + *  Valure returned by rte_thash_get_compliment
> [Wang, Yipeng] typo. *value
>> + * @param fn
>> + *  Callback function to check adjusted tuple. Could be NULL
>> + * @param userdata
>> + *  Pointer to the userdata to be passed to fn(). Could be NULL
>> + *
>> + * @return
>> + *  0 on success
>> + *  negative otherwise
>> + */
>> +__rte_experimental
>> +int
>> +rte_thash_adjust_tuple(struct rte_thash_subtuple_helper *h,
>> +uint8_t *orig_tuple, uint32_t adj_bits,
>> +rte_thash_check_tuple_t fn, void *userdata);
>> +
>>   #ifdef __cplusplus
>>   }
>>   #endif
>> diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map index
>> 93cb230..a992a1e 100644
>> --- a/lib/librte_hash/version.map
>> +++ b/lib/librte_hash/version.map
>> @@ -32,6 +32,7 @@ DPDK_21 {
>>   EXPERIMENTAL {
>>   global:
>>
>> +rte_thash_adjust_tuple;
>>   rte_hash_free_key_with_position;
>>   rte_hash_lookup_with_hash_bulk;
>>   rte_hash_lookup_with_hash_bulk_data;
>> --
>> 2.7.4
> 

-- 
Regards,
Vladimir

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v3 0/3] Predictable RSS feature
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
  2021-04-08 15:56   ` Stephen Hemminger
  2021-04-10  0:32   ` Wang, Yipeng1
@ 2021-04-11 19:11   ` Vladimir Medvedkin
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 1/3] hash: add predictable RSS API Vladimir Medvedkin
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-11 19:11 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch series introduces predictable RSS feature.
It is based on the idea of searching for partial hash collisions
within Toeplitz hash.

The Toeplitz hash function is a homomorphism between (G, ^) and (H, ^),
where (G, ^) - is a group of tuples and (H, ^) is a group of hashes
with respect to XOR operation. So tuples and hashes could be treated as
n-dimension and 32-dimension vector spaces over GF(2).
So, f(x ^ y) == f(x) ^ f(y)
where f - is the toeplitz hash function and x, y are tuples.

The ability to predict partial collisions allows user to compute
input hash value with desired LSB values.
Usually number of LSB's are defined by the size of RSS Redirection Table.

There could be number of use cases, for example:
1) NAT. Using this library it is possible to select a new port number
on a translation in the way that rss hash for original tuple will have
the same LSB's as rss hash for reverse tuple.
2) IPSec/MPLS/Vxlan. It is possible to choose tunnel id to be pinned to
a desired queue.
3) TCP stack. It is possible to choose a source port number for outgoing
connections in the way that received replies will be assigned to
desired queue.
4) RSS hash key generation. Hash key initialization with random values
does not guarantee an uniform distribution amongst queues. This library
uses mathematically proved algorithm to complete the rss hash key to
provide the best distribution.

v3:
- reworked rte_thash_adjust_tuple()
- added extra comments
- fixed typos
- rte_thash_adjust_tuple() API was putted into the first commit

v2:
- added extra API rte_thash_adjust_tuple()
- added extra tests for rte_thash_adjust_tuple()
- added extra fields to rte_thash_subtuple_helper struct
- fixed typos

Vladimir Medvedkin (3):
  hash: add predictable RSS API
  hash: add predictable RSS implementation
  test/hash: add additional thash tests

 app/test/test_thash.c       | 469 +++++++++++++++++++++++++++++-
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c | 673 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 193 +++++++++++++
 lib/librte_hash/version.map |   8 +
 5 files changed, 1339 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_hash/rte_thash.c

-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v3 1/3] hash: add predictable RSS API
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
                     ` (2 preceding siblings ...)
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 " Vladimir Medvedkin
@ 2021-04-11 19:11   ` Vladimir Medvedkin
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 3/3] test/hash: add additional thash tests Vladimir Medvedkin
  5 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-11 19:11 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds predictable RSS API.
It is based on the idea of searching partial Toeplitz hash collisions.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/meson.build |   3 +-
 lib/librte_hash/rte_thash.c | 109 +++++++++++++++++++++++++
 lib/librte_hash/rte_thash.h | 193 ++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/version.map |   8 ++
 4 files changed, 312 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_hash/rte_thash.c

diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index 242859f..3546014 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -8,6 +8,7 @@ headers = files('rte_fbk_hash.h',
 	'rte_thash.h')
 indirect_headers += files('rte_crc_arm64.h')
 
-sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c')
+sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c', 'rte_thash.c')
+deps += ['net']
 deps += ['ring']
 deps += ['rcu']
diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
new file mode 100644
index 0000000..1325678
--- /dev/null
+++ b/lib/librte_hash/rte_thash.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <rte_thash.h>
+#include <rte_tailq.h>
+#include <rte_random.h>
+#include <rte_memcpy.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+
+#define THASH_NAME_LEN		64
+
+struct thash_lfsr {
+	uint32_t	ref_cnt;
+	uint32_t	poly;
+	/**< polynomial associated with the lfsr */
+	uint32_t	rev_poly;
+	/**< polynomial to generate the sequence in reverse direction */
+	uint32_t	state;
+	/**< current state of the lfsr */
+	uint32_t	rev_state;
+	/**< current state of the lfsr for reverse direction */
+	uint32_t	deg;	/**< polynomial degree*/
+	uint32_t	bits_cnt;  /**< number of bits generated by lfsr*/
+};
+
+struct rte_thash_subtuple_helper {
+	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
+	LIST_ENTRY(rte_thash_subtuple_helper)	next;
+	struct thash_lfsr	*lfsr;
+	uint32_t	offset;		/** < Offset of the m-sequence */
+	uint32_t	len;		/** < Length of the m-sequence */
+	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
+	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
+	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
+	__extension__ uint32_t	compl_table[0] __rte_cache_aligned;
+	/** < Complementary table */
+};
+
+struct rte_thash_ctx {
+	char		name[THASH_NAME_LEN];
+	LIST_HEAD(, rte_thash_subtuple_helper) head;
+	uint32_t	key_len;	/** < Length of the NIC RSS hash key */
+	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
+	uint32_t	subtuples_nb;	/** < number of subtuples */
+	uint32_t	flags;
+	uint8_t		hash_key[0];
+};
+
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name __rte_unused,
+	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
+	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+{
+	return NULL;
+}
+
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name __rte_unused)
+{
+	return NULL;
+}
+
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+{
+}
+
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused, uint32_t len __rte_unused,
+	uint32_t offset __rte_unused)
+{
+	return 0;
+}
+
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
+	const char *name __rte_unused)
+{
+	return NULL;
+}
+
+uint32_t
+rte_thash_get_complement(struct rte_thash_subtuple_helper *h __rte_unused,
+	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+{
+	return 0;
+}
+
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+{
+	return NULL;
+}
+
+int
+rte_thash_adjust_tuple(struct rte_thash_ctx *ctx __rte_unused,
+	struct rte_thash_subtuple_helper *h __rte_unused,
+	uint8_t *tuple __rte_unused, unsigned int tuple_len __rte_unused,
+	uint32_t desired_value __rte_unused,
+	unsigned int attempts __rte_unused,
+	rte_thash_check_tuple_t fn __rte_unused, void *userdata __rte_unused)
+{
+	return 0;
+}
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 061efa2..f5602ba 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015-2019 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2021 Intel Corporation
  */
 
 #ifndef _RTE_THASH_H
@@ -222,6 +223,198 @@ rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
 	return ret;
 }
 
+/**
+ * LFSR will ignore if generated m-sequence has more than 2^n -1 bits
+ */
+#define RTE_THASH_IGNORE_PERIOD_OVERFLOW	0x1
+/**
+ * Generate minimal required bit (equal to ReTa LSB) sequence into
+ * the hash_key
+ */
+#define RTE_THASH_MINIMAL_SEQ			0x2
+
+/** @internal thash context structure. */
+struct rte_thash_ctx;
+/** @internal thash helper structure. */
+struct rte_thash_subtuple_helper;
+
+/**
+ * Create a new thash context.
+ *
+ * @param name
+ *  context name
+ * @param key_len
+ *  length of the toeplitz hash key
+ * @param reta_sz
+ *  logarithm of the NIC's Redirection Table (ReTa) size,
+ *  i.e. number of the LSBs if the hash used to determine
+ *  the reta entry.
+ * @param key
+ *  pointer to the key used to init an internal key state.
+ *  Could be NULL, in this case internal key will be inited with random.
+ * @param flags
+ *  supported flags are:
+ *   RTE_THASH_IGNORE_PERIOD_OVERFLOW
+ *   RTE_THASH_MINIMAL_SEQ
+ * @return
+ *  A pointer to the created context on success
+ *  NULL otherwise
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags);
+
+/**
+ * Find an existing thash context and return a pointer to it.
+ *
+ * @param name
+ *  Name of the thash context
+ * @return
+ *  Pointer to the thash context or NULL if it was not found with rte_errno
+ *  set appropriately. Possible rte_errno values include:
+ *   - ENOENT - required entry not available to return.
+ */
+__rte_experimental
+struct rte_thash_ctx *
+rte_thash_find_existing(const char *name);
+
+/**
+ * Free a thash context object
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  None
+ */
+__rte_experimental
+void
+rte_thash_free_ctx(struct rte_thash_ctx *ctx);
+
+/**
+ * Add a special properties to the toeplitz hash key inside a thash context.
+ * Creates an internal helper struct which has a complementary table
+ * to calculate toeplitz hash collisions.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @param len
+ *  length in bits of the target subtuple
+ *  Must be no shorter than reta_sz passed on rte_thash_init_ctx().
+ * @param offset
+ *  offset in bits of the subtuple
+ * @return
+ *  0 on success
+ *  negative on error
+ */
+__rte_experimental
+int
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset);
+
+/**
+ * Find a helper in the context by the given name
+ *
+ * @param ctx
+ *  thash context
+ * @param name
+ *  name of the helper
+ * @return
+ *  Pointer to the thash helper or NULL if it was not found.
+ */
+__rte_experimental
+struct rte_thash_subtuple_helper *
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name);
+
+/**
+ * Get a complementary value for the subtuple to produce a
+ * partial toeplitz hash collision. It must be XOR'ed with the
+ * subtuple to produce the hash value with the desired hash LSB's
+ * This function is multi-thread safe.
+ *
+ * @param h
+ *  Pointer to the helper struct
+ * @param hash
+ *  toeplitz hash value calculated for the given tuple
+ * @param desired_hash
+ *  desired hash value to find a collision for
+ * @return
+ *  A complementary value which must be xored with the corresponding subtuple
+ */
+__rte_experimental
+uint32_t
+rte_thash_get_complement(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash);
+
+/**
+ * Get a pointer to the toeplitz hash contained in the context.
+ * It changes after each addition of a helper. It should be installed to
+ * the NIC.
+ *
+ * @param ctx
+ *  thash context
+ * @return
+ *  A pointer to the toeplitz hash key
+ */
+__rte_experimental
+const uint8_t *
+rte_thash_get_key(struct rte_thash_ctx *ctx);
+
+/**
+ * Function prototype for the rte_thash_adjust_tuple
+ * to check if adjusted tuple could be used.
+ * Generally it is some kind of lookup function to check
+ * if adjusted tuple is already in use.
+ *
+ * @param userdata
+ *  Pointer to the userdata. It could be a pointer to the
+ *  table with used tuples to search.
+ * @param tuple
+ *  Pointer to the tuple to check
+ *
+ * @return
+ *  1 on success
+ *  0 otherwise
+ */
+typedef int (*rte_thash_check_tuple_t)(void *userdata, uint8_t *tuple);
+
+/**
+ * Adjusts tuple in the way to make Toeplitz hash has
+ * desired least significant bits.
+ * This function is multi-thread safe.
+ *
+ * @param ctx
+ *  thash context
+ * @param h
+ *  Pointer to the helper struct
+ * @param tuple
+ *  Pointer to the tuple to be adjusted
+ * @param tuple_len
+ *  Length of the tuple. Must be multiple of 4.
+ * @param desired_value
+ *  Desired value of least significant bits of the hash
+ * @param attempts
+ *   Number of attempts to adjust tuple with fn() calling
+ * @param fn
+ *  Callback function to check adjusted tuple. Could be NULL
+ * @param userdata
+ *  Pointer to the userdata to be passed to fn(). Could be NULL
+ *
+ * @return
+ *  0 on success
+ *  negative otherwise
+ */
+__rte_experimental
+int
+rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h,
+	uint8_t *tuple, unsigned int tuple_len,
+	uint32_t desired_value, unsigned int attempts,
+	rte_thash_check_tuple_t fn, void *userdata);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_hash/version.map b/lib/librte_hash/version.map
index c6d7308..17cb8aa 100644
--- a/lib/librte_hash/version.map
+++ b/lib/librte_hash/version.map
@@ -32,9 +32,17 @@ DPDK_21 {
 EXPERIMENTAL {
 	global:
 
+	rte_thash_adjust_tuple;
 	rte_hash_free_key_with_position;
 	rte_hash_lookup_with_hash_bulk;
 	rte_hash_lookup_with_hash_bulk_data;
 	rte_hash_max_key_id;
 	rte_hash_rcu_qsbr_add;
+	rte_thash_add_helper;
+	rte_thash_find_existing;
+	rte_thash_free_ctx;
+	rte_thash_get_complement;
+	rte_thash_get_helper;
+	rte_thash_get_key;
+	rte_thash_init_ctx;
 };
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v3 2/3] hash: add predictable RSS implementation
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
                     ` (3 preceding siblings ...)
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 1/3] hash: add predictable RSS API Vladimir Medvedkin
@ 2021-04-11 19:11   ` Vladimir Medvedkin
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 3/3] test/hash: add additional thash tests Vladimir Medvedkin
  5 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-11 19:11 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch implements predictable RSS functionality.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/librte_hash/rte_thash.c | 610 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 587 insertions(+), 23 deletions(-)

diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c
index 1325678..c76ceee 100644
--- a/lib/librte_hash/rte_thash.c
+++ b/lib/librte_hash/rte_thash.c
@@ -12,6 +12,45 @@
 #include <rte_malloc.h>
 
 #define THASH_NAME_LEN		64
+#define TOEPLITZ_HASH_LEN	32
+
+#define	RETA_SZ_MIN	2U
+#define	RETA_SZ_MAX	16U
+#define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RETA_SZ_MIN) && \
+					(reta_sz <= RETA_SZ_MAX))
+
+TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_thash_tailq = {
+	.name = "RTE_THASH",
+};
+EAL_REGISTER_TAILQ(rte_thash_tailq)
+
+/**
+ * Table of some irreducible polinomials over GF(2).
+ * For lfsr they are reperesented in BE bit order, and
+ * x^0 is masked out.
+ * For example, poly x^5 + x^2 + 1 will be represented
+ * as (101001b & 11111b) = 01001b = 0x9
+ */
+static const uint32_t irreducible_poly_table[][4] = {
+	{0, 0, 0, 0},	/** < degree 0 */
+	{1, 1, 1, 1},	/** < degree 1 */
+	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
+	{0x5, 0x3, 0x5, 0x3},
+	{0x9, 0x3, 0x9, 0x3},
+	{0x9, 0x1b, 0xf, 0x5},
+	{0x21, 0x33, 0x1b, 0x2d},
+	{0x41, 0x11, 0x71, 0x9},
+	{0x71, 0xa9, 0xf5, 0x8d},
+	{0x21, 0xd1, 0x69, 0x1d9},
+	{0x81, 0x2c1, 0x3b1, 0x185},
+	{0x201, 0x541, 0x341, 0x461},
+	{0x941, 0x609, 0xe19, 0x45d},
+	{0x1601, 0x1f51, 0x1171, 0x359},
+	{0x2141, 0x2111, 0x2db1, 0x2109},
+	{0x4001, 0x801, 0x101, 0x7301},
+	{0x7781, 0xa011, 0x4211, 0x86d9},
+};
 
 struct thash_lfsr {
 	uint32_t	ref_cnt;
@@ -50,60 +89,585 @@ struct rte_thash_ctx {
 	uint8_t		hash_key[0];
 };
 
+static inline uint32_t
+get_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	/*
+	 * masking the TAP bits defined by the polynomial and
+	 * calculating parity
+	 */
+	bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1;
+	ret = lfsr->state & 0x1;
+	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+get_rev_bit_lfsr(struct thash_lfsr *lfsr)
+{
+	uint32_t bit, ret;
+
+	bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1;
+	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
+	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
+		((1 << lfsr->deg) - 1);
+
+	lfsr->bits_cnt++;
+	return ret;
+}
+
+static inline uint32_t
+thash_get_rand_poly(uint32_t poly_degree)
+{
+	return irreducible_poly_table[poly_degree][rte_rand() %
+		RTE_DIM(irreducible_poly_table[poly_degree])];
+}
+
+static struct thash_lfsr *
+alloc_lfsr(struct rte_thash_ctx *ctx)
+{
+	struct thash_lfsr *lfsr;
+	uint32_t i;
+
+	if (ctx == NULL)
+		return NULL;
+
+	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
+	if (lfsr == NULL)
+		return NULL;
+
+	lfsr->deg = ctx->reta_sz_log;
+	lfsr->poly = thash_get_rand_poly(lfsr->deg);
+	do {
+		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
+	} while (lfsr->state == 0);
+	/* init reverse order polynomial */
+	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
+	/* init proper rev_state*/
+	lfsr->rev_state = lfsr->state;
+	for (i = 0; i <= lfsr->deg; i++)
+		get_rev_bit_lfsr(lfsr);
+
+	/* clear bits_cnt after rev_state was inited */
+	lfsr->bits_cnt = 0;
+	lfsr->ref_cnt = 1;
+
+	return lfsr;
+}
+
+static void
+attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt++;
+	h->lfsr = lfsr;
+}
+
+static void
+free_lfsr(struct thash_lfsr *lfsr)
+{
+	lfsr->ref_cnt--;
+	if (lfsr->ref_cnt == 0)
+		rte_free(lfsr);
+}
+
 struct rte_thash_ctx *
-rte_thash_init_ctx(const char *name __rte_unused,
-	uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused,
-	uint8_t *key __rte_unused, uint32_t flags __rte_unused)
+rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
+	uint8_t *key, uint32_t flags)
 {
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	uint32_t i;
+
+	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_write_lock();
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+	ctx = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH,
+			"Can not allocate tailq entry for thash context %s\n",
+			name);
+		rte_errno = ENOMEM;
+		goto exit;
+	}
+
+	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
+	if (ctx == NULL) {
+		RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n",
+			name);
+		rte_errno = ENOMEM;
+		goto free_te;
+	}
+
+	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
+	ctx->key_len = key_len;
+	ctx->reta_sz_log = reta_sz;
+	LIST_INIT(&ctx->head);
+	ctx->flags = flags;
+
+	if (key)
+		rte_memcpy(ctx->hash_key, key, key_len);
+	else {
+		for (i = 0; i < key_len; i++)
+			ctx->hash_key[i] = rte_rand();
+	}
+
+	te->data = (void *)ctx;
+	TAILQ_INSERT_TAIL(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+
+	return ctx;
+free_te:
+	rte_free(te);
+exit:
+	rte_mcfg_tailq_write_unlock();
 	return NULL;
 }
 
 struct rte_thash_ctx *
-rte_thash_find_existing(const char *name __rte_unused)
+rte_thash_find_existing(const char *name)
 {
-	return NULL;
+	struct rte_thash_ctx *ctx;
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+
+	rte_mcfg_tailq_read_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		ctx = (struct rte_thash_ctx *)te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+
+	rte_mcfg_tailq_read_unlock();
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return ctx;
 }
 
 void
-rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_free_ctx(struct rte_thash_ctx *ctx)
 {
+	struct rte_tailq_entry *te;
+	struct rte_thash_list *thash_list;
+	struct rte_thash_subtuple_helper *ent, *tmp;
+
+	if (ctx == NULL)
+		return;
+
+	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
+	rte_mcfg_tailq_write_lock();
+	TAILQ_FOREACH(te, thash_list, next) {
+		if (te->data == (void *)ctx)
+			break;
+	}
+
+	if (te != NULL)
+		TAILQ_REMOVE(thash_list, te, next);
+
+	rte_mcfg_tailq_write_unlock();
+	ent = LIST_FIRST(&(ctx->head));
+	while (ent) {
+		free_lfsr(ent->lfsr);
+		tmp = ent;
+		ent = LIST_NEXT(ent, next);
+		LIST_REMOVE(tmp, next);
+		rte_free(tmp);
+	}
+
+	rte_free(ctx);
+	rte_free(te);
+}
+
+static inline void
+set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos / CHAR_BIT;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp &= ~(1 << bit_idx);
+	tmp |= bit << bit_idx;
+	ptr[byte_idx] = tmp;
+}
+
+/**
+ * writes m-sequence to the hash_key for range [start, end]
+ * (i.e. including start and end positions)
+ */
+static int
+generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
+	uint32_t start, uint32_t end)
+{
+	uint32_t i;
+	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
+	req_bits++; /* due to incuding end */
+
+	/* check if lfsr overflow period of the m-sequence */
+	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
+			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
+			RTE_THASH_IGNORE_PERIOD_OVERFLOW))
+		return -ENOSPC;
+
+	if (start < end) {
+		/* original direction (from left to right)*/
+		for (i = start; i <= end; i++)
+			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
+
+	} else {
+		/* reverse direction (from right to left) */
+		for (i = end; i >= start; i--)
+			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
+	}
+
+	return 0;
+}
+
+static inline uint32_t
+get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
+{
+	uint32_t *tmp, val;
+
+	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
+	val = rte_be_to_cpu_32(*tmp);
+	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
+		ctx->reta_sz_log));
+
+	return val & ((1 << ctx->reta_sz_log) - 1);
+}
+
+static inline void
+generate_complement_table(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h)
+{
+	int i, j, k;
+	uint32_t val;
+	uint32_t start;
+
+	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
+
+	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
+		val = 0;
+		for (j = i; j; j &= (j - 1)) {
+			k = rte_bsf32(j);
+			val ^= get_subvalue(ctx, start - k +
+				ctx->reta_sz_log - 1);
+		}
+		h->compl_table[val] = i;
+	}
+}
+
+static inline int
+insert_before(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	uint32_t start, uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if (end < cur_ent->offset) {
+		ent->lfsr = alloc_lfsr(ctx);
+		if (ent->lfsr == NULL) {
+			rte_free(ent);
+			return -ENOMEM;
+		}
+		/* generate nonoverlapping range [start, end) */
+		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -ENOSPC;
+	}
+	attach_lfsr(ent, cur_ent->lfsr);
+
+	/**
+	 * generate partially overlapping range
+	 * [start, cur_ent->start) in reverse order
+	 */
+	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_BEFORE(cur_ent, ent, next);
+	generate_complement_table(ctx, ent);
+	ctx->subtuples_nb++;
+	return 0;
+}
+
+static inline int
+insert_after(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *ent,
+	struct rte_thash_subtuple_helper *cur_ent,
+	struct rte_thash_subtuple_helper *next_ent,
+	struct rte_thash_subtuple_helper *prev_ent,
+	uint32_t end, uint32_t range_end)
+{
+	int ret;
+
+	if ((next_ent != NULL) && (end > next_ent->offset)) {
+		rte_free(ent);
+		return -EEXIST;
+	}
+
+	attach_lfsr(ent, cur_ent->lfsr);
+	if (end > range_end) {
+		/**
+		 * generate partially overlapping range
+		 * (range_end, end)
+		 */
+		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
+		if (ret != 0) {
+			free_lfsr(ent->lfsr);
+			rte_free(ent);
+			return ret;
+		}
+	}
+
+	LIST_INSERT_AFTER(prev_ent, ent, next);
+	generate_complement_table(ctx, ent);
+	ctx->subtuples_nb++;
+
+	return 0;
 }
 
 int
-rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused, uint32_t len __rte_unused,
-	uint32_t offset __rte_unused)
+rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
+	uint32_t offset)
 {
+	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
+	uint32_t start, end;
+	int ret;
+
+	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
+			((offset + len + TOEPLITZ_HASH_LEN - 1) >
+			ctx->key_len * CHAR_BIT))
+		return -EINVAL;
+
+	/* Check for existing name*/
+	LIST_FOREACH(cur_ent, &ctx->head, next) {
+		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
+			return -EEXIST;
+	}
+
+	end = offset + len + TOEPLITZ_HASH_LEN - 1;
+	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
+		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
+		offset;
+
+	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
+		sizeof(uint32_t) * (1 << ctx->reta_sz_log),
+		RTE_CACHE_LINE_SIZE);
+	if (ent == NULL)
+		return -ENOMEM;
+
+	rte_strlcpy(ent->name, name, sizeof(ent->name));
+	ent->offset = start;
+	ent->len = end - start;
+	ent->tuple_offset = offset;
+	ent->tuple_len = len;
+	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
+
+	cur_ent = LIST_FIRST(&ctx->head);
+	while (cur_ent) {
+		uint32_t range_end = cur_ent->offset + cur_ent->len;
+		next_ent = LIST_NEXT(cur_ent, next);
+		prev_ent = cur_ent;
+		/* Iterate through overlapping ranges */
+		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
+			range_end = RTE_MAX(next_ent->offset + next_ent->len,
+				range_end);
+			if (start > next_ent->offset)
+				prev_ent = next_ent;
+
+			next_ent = LIST_NEXT(next_ent, next);
+		}
+
+		if (start < cur_ent->offset)
+			return insert_before(ctx, ent, cur_ent, next_ent,
+				start, end, range_end);
+		else if (start < range_end)
+			return insert_after(ctx, ent, cur_ent, next_ent,
+				prev_ent, end, range_end);
+
+		cur_ent = next_ent;
+		continue;
+	}
+
+	ent->lfsr = alloc_lfsr(ctx);
+	if (ent->lfsr == NULL) {
+		rte_free(ent);
+		return -ENOMEM;
+	}
+
+	/* generate nonoverlapping range [start, end) */
+	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
+	if (ret != 0) {
+		free_lfsr(ent->lfsr);
+		rte_free(ent);
+		return ret;
+	}
+	if (LIST_EMPTY(&ctx->head)) {
+		LIST_INSERT_HEAD(&ctx->head, ent, next);
+	} else {
+		LIST_FOREACH(next_ent, &ctx->head, next)
+			prev_ent = next_ent;
+
+		LIST_INSERT_AFTER(prev_ent, ent, next);
+	}
+	generate_complement_table(ctx, ent);
+	ctx->subtuples_nb++;
+
 	return 0;
 }
 
 struct rte_thash_subtuple_helper *
-rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused,
-	const char *name __rte_unused)
+rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
 {
+	struct rte_thash_subtuple_helper *ent;
+
+	if ((ctx == NULL) || (name == NULL))
+		return NULL;
+
+	LIST_FOREACH(ent, &ctx->head, next) {
+		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
+			return ent;
+	}
+
 	return NULL;
 }
 
 uint32_t
-rte_thash_get_complement(struct rte_thash_subtuple_helper *h __rte_unused,
-	uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
+rte_thash_get_complement(struct rte_thash_subtuple_helper *h,
+	uint32_t hash, uint32_t desired_hash)
 {
-	return 0;
+	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
 }
 
 const uint8_t *
-rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused)
+rte_thash_get_key(struct rte_thash_ctx *ctx)
 {
-	return NULL;
+	return ctx->hash_key;
+}
+
+static inline void
+xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
+{
+	uint32_t byte_idx = pos >> 3;
+	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
+	uint8_t tmp;
+
+	tmp = ptr[byte_idx];
+	tmp ^= bit << bit_idx;
+	ptr[byte_idx] = tmp;
 }
 
 int
-rte_thash_adjust_tuple(struct rte_thash_ctx *ctx __rte_unused,
-	struct rte_thash_subtuple_helper *h __rte_unused,
-	uint8_t *tuple __rte_unused, unsigned int tuple_len __rte_unused,
-	uint32_t desired_value __rte_unused,
-	unsigned int attempts __rte_unused,
-	rte_thash_check_tuple_t fn __rte_unused, void *userdata __rte_unused)
+rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
+	struct rte_thash_subtuple_helper *h,
+	uint8_t *tuple, unsigned int tuple_len,
+	uint32_t desired_value,	unsigned int attempts,
+	rte_thash_check_tuple_t fn, void *userdata)
 {
-	return 0;
+	uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
+	unsigned int i, j, ret = 0;
+	uint32_t hash, adj_bits;
+	uint8_t bit;
+	const uint8_t *hash_key;
+
+	if ((ctx == NULL) || (h == NULL) || (tuple == NULL) ||
+			(tuple_len % sizeof(uint32_t) != 0) || (attempts <= 0))
+		return -EINVAL;
+
+	hash_key = rte_thash_get_key(ctx);
+
+	for (i = 0; i < attempts; i++) {
+		for (j = 0; j < (tuple_len / 4); j++)
+			tmp_tuple[j] =
+				rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
+
+		hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
+		adj_bits = rte_thash_get_complement(h, hash, desired_value);
+
+		/*
+		 * Hint: LSB of adj_bits corresponds to
+		 * offset + len bit of tuple
+		 */
+		for (j = 0; j < sizeof(uint32_t) * CHAR_BIT; j++) {
+			bit = (adj_bits >> j) & 0x1;
+			if (bit)
+				xor_bit(tuple, bit, h->tuple_offset +
+					h->tuple_len - 1 - j);
+		}
+
+		if (fn != NULL) {
+			ret = (fn(userdata, tuple)) ? 0 : -EEXIST;
+			if (ret == 0)
+				return 0;
+			else if (i < (attempts - 1)) {
+				/* Update tuple with random bits */
+				for (j = 0; j < h->tuple_len; j++) {
+					bit = rte_rand() & 0x1;
+					if (bit)
+						xor_bit(tuple, bit,
+							h->tuple_offset +
+							h->tuple_len - 1 - j);
+				}
+			}
+		} else
+			return 0;
+	}
+
+	return ret;
 }
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v3 3/3] test/hash: add additional thash tests
  2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
                     ` (4 preceding siblings ...)
  2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
@ 2021-04-11 19:11   ` Vladimir Medvedkin
  5 siblings, 0 replies; 23+ messages in thread
From: Vladimir Medvedkin @ 2021-04-11 19:11 UTC (permalink / raw)
  To: dev
  Cc: konstantin.ananyev, andrey.chilikin, ray.kinsella, yipeng1.wang,
	sameh.gobriel, bruce.richardson

This patch adds tests for predictable RSS feature.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 app/test/test_thash.c | 469 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 463 insertions(+), 6 deletions(-)

diff --git a/app/test/test_thash.c b/app/test/test_thash.c
index a6aadd1..d8981fb 100644
--- a/app/test/test_thash.c
+++ b/app/test/test_thash.c
@@ -5,11 +5,15 @@
 #include <rte_common.h>
 #include <rte_eal.h>
 #include <rte_ip.h>
+#include <rte_random.h>
 
 #include "test.h"
 
 #include <rte_thash.h>
 
+#define HASH_MSK(reta_sz)	((1 << reta_sz) - 1)
+#define TUPLE_SZ	(RTE_THASH_V4_L4_LEN * 4)
+
 struct test_thash_v4 {
 	uint32_t	dst_ip;
 	uint32_t	src_ip;
@@ -75,7 +79,7 @@ uint8_t default_rss_key[] = {
 };
 
 static int
-test_thash(void)
+test_toeplitz_hash_calc(void)
 {
 	uint32_t i, j;
 	union rte_thash_tuple tuple;
@@ -100,7 +104,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, default_rss_key);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V4_L3_LEN, rss_key_be);
@@ -108,7 +112,7 @@ test_thash(void)
 				RTE_THASH_V4_L4_LEN, rss_key_be);
 		if ((rss_l3 != v4_tbl[i].hash_l3) ||
 				(rss_l3l4 != v4_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
 	for (i = 0; i < RTE_DIM(v6_tbl); i++) {
 		/*Fill ipv6 hdr*/
@@ -127,7 +131,7 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, default_rss_key);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 		/*Calculate hash with converted key*/
 		rss_l3 = rte_softrss_be((uint32_t *)&tuple,
 				RTE_THASH_V6_L3_LEN, rss_key_be);
@@ -135,9 +139,462 @@ test_thash(void)
 				RTE_THASH_V6_L4_LEN, rss_key_be);
 		if ((rss_l3 != v6_tbl[i].hash_l3) ||
 				(rss_l3l4 != v6_tbl[i].hash_l3l4))
-			return -1;
+			return -TEST_FAILED;
 	}
-	return 0;
+	return TEST_SUCCESS;
+}
+
+static int
+test_create_invalid(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+
+	ctx = rte_thash_init_ctx(NULL, key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx("test", 0, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 1, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	ctx = rte_thash_init_ctx(NULL, key_len, 17, NULL, 0);
+	RTE_TEST_ASSERT(ctx == NULL,
+		"Call succeeded with invalid parameters\n");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_multiple_create(void)
+{
+	struct rte_thash_ctx *ctx;
+	int key_len = 40;
+	int reta_sz = 7;
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+		RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+		rte_thash_free_ctx(ctx);
+	}
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_free_null(void)
+{
+	struct rte_thash_ctx *ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create CTX\n");
+
+	rte_thash_free_ctx(ctx);
+	rte_thash_free_ctx(NULL);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_add_invalid_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	const int key_len = 40;
+	int reta_sz = 7;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(NULL, "test", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, NULL, reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz - 1, 0);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "test", reta_sz, key_len * 8);
+	RTE_TEST_ASSERT(ret == -EINVAL,
+		"Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", reta_sz, 0);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with duplicated name\n");
+
+	/*
+	 * Create second helper with offset 3 * reta_sz.
+	 * Note first_range helper created range in key:
+	 * [0, 32 + length{= reta_sz} - 1), i.e [0, 37).
+	 * second range is [44, 81)
+	 */
+	ret = rte_thash_add_helper(ctx, "second_range", reta_sz,
+		32 +  2 * reta_sz);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	/*
+	 * Try to create overlapping with first_ and second_ ranges,
+	 * i.e. [6, 49)
+	 */
+	ret = rte_thash_add_helper(ctx, "third_range", 2 * reta_sz, reta_sz);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"Call succeeded with overlapping ranges\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_find_existing(void)
+{
+	struct rte_thash_ctx *ctx, *ret_ctx;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret_ctx = rte_thash_find_existing("test");
+	RTE_TEST_ASSERT(ret_ctx != NULL, "can not find existing ctx\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_get_helper(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	int ret;
+
+	ctx = rte_thash_init_ctx("test", 40, 7, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	h = rte_thash_get_helper(NULL, "first_range");
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	h = rte_thash_get_helper(ctx, NULL);
+	RTE_TEST_ASSERT(h == NULL, "Call succeeded with invalid parameters\n");
+
+	ret = rte_thash_add_helper(ctx, "first_range", 8, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	h = rte_thash_get_helper(ctx, "first_range");
+	RTE_TEST_ASSERT(h != NULL, "Can not find helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_period_overflow(void)
+{
+	struct rte_thash_ctx *ctx;
+	int reta_sz = 7; /* reflects polynomial degree */
+	int ret;
+
+	/* first create without RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz), 0);
+	RTE_TEST_ASSERT(ret == -ENOSPC,
+		"Call succeeded with invalid parameters\n");
+
+	/* requested range == len + 32 - 1, smaller than (2^reta_sz) - 1 */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) - 32, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	/* create with RTE_THASH_IGNORE_PERIOD_OVERFLOW flag */
+	ctx = rte_thash_init_ctx("test", 40, reta_sz, NULL,
+		RTE_THASH_IGNORE_PERIOD_OVERFLOW);
+	RTE_TEST_ASSERT(ctx != NULL, "Can not create thash ctx\n");
+
+	/* requested range > (2^reta_sz - 1) */
+	ret = rte_thash_add_helper(ctx, "test", (1 << reta_sz) + 10, 0);
+	RTE_TEST_ASSERT(ret == 0, "Can not create helper\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_predictable_rss_min_seq(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	const int key_len = 40;
+	int reta_sz = 6;
+	uint8_t initial_key[key_len];
+	const uint8_t *new_key;
+	int ret;
+	union rte_thash_tuple tuple;
+	uint32_t orig_hash, adj_hash, adj;
+	unsigned int desired_value = 27 & HASH_MSK(reta_sz);
+	uint16_t port_value = 22;
+
+	memset(initial_key, 0, key_len);
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, initial_key,
+		RTE_THASH_MINIMAL_SEQ);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	ret = rte_thash_add_helper(ctx, "snat", sizeof(uint16_t) * 8,
+		offsetof(union rte_thash_tuple, v4.sport) * 8);
+	RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
+
+	h = rte_thash_get_helper(ctx, "snat");
+	RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
+
+	new_key = rte_thash_get_key(ctx);
+	tuple.v4.src_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.dst_addr = RTE_IPV4(0, 0, 0, 0);
+	tuple.v4.sport = 0;
+	tuple.v4.sport = rte_cpu_to_be_16(port_value);
+	tuple.v4.dport = 0;
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	orig_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	adj = rte_thash_get_complement(h, orig_hash, desired_value);
+
+	tuple.v4.sctp_tag = rte_cpu_to_be_32(tuple.v4.sctp_tag);
+	tuple.v4.sport ^= rte_cpu_to_be_16(adj);
+	tuple.v4.sctp_tag = rte_be_to_cpu_32(tuple.v4.sctp_tag);
+
+	adj_hash = rte_softrss((uint32_t *)&tuple,
+		RTE_THASH_V4_L4_LEN, new_key);
+	RTE_TEST_ASSERT((adj_hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+/*
+ * This test creates 7 subranges in the following order:
+ * range_one	= [56, 95),	len = 8, offset = 56
+ * range_two	= [64, 103),	len = 8, offset = 64
+ * range_three	= [120, 159),	len = 8, offset = 120
+ * range_four	= [48, 87),	len = 8, offset = 48
+ * range_five	= [57, 95),	len = 7, offset = 57
+ * range_six	= [40, 111),	len = 40, offset = 40
+ * range_seven	= [0, 39),	len = 8, offset = 0
+ */
+struct range {
+	const char *name;
+	int len;
+	int offset;
+	int byte_idx;
+};
+
+struct range rng_arr[] = {
+	{"one",   8,  56,  7},
+	{"two",   8,  64,  8},
+	{"three", 8,  120, 15},
+	{"four",  8,  48,  6},
+	{"six",   40, 40,  9},
+	{"five",  7,  57,  7},
+	{"seven", 8,  0,   0}
+};
+
+static int
+test_predictable_rss_multirange(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h[RTE_DIM(rng_arr)];
+	const uint8_t *new_key;
+	const int key_len = 40;
+	int reta_sz = 7;
+	unsigned int i, j, k;
+	int ret;
+	uint32_t desired_value = rte_rand() & HASH_MSK(reta_sz);
+	uint8_t tuples[RTE_DIM(rng_arr)][16] = { {0} };
+	uint32_t *ptr;
+	uint32_t hashes[RTE_DIM(rng_arr)];
+	uint32_t adj_hashes[RTE_DIM(rng_arr)];
+	uint32_t adj;
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		ret = rte_thash_add_helper(ctx, rng_arr[i].name,
+			rng_arr[i].len, rng_arr[i].offset);
+		RTE_TEST_ASSERT(ret == 0, "can not add helper\n");
+
+		h[i] = rte_thash_get_helper(ctx, rng_arr[i].name);
+		RTE_TEST_ASSERT(h[i] != NULL, "can not find helper\n");
+	}
+	new_key = rte_thash_get_key(ctx);
+
+	/*
+	 * calculate hashes, complements, then adjust keys with
+	 * complements and recalsulate hashes
+	 */
+	for (i = 0; i < RTE_DIM(rng_arr); i++) {
+		for (k = 0; k < 100; k++) {
+			/* init with random keys */
+			ptr = (uint32_t *)&tuples[i][0];
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_rand();
+			/* convert keys from BE to CPU byte order */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			hashes[i] = rte_softrss(ptr, 4, new_key);
+			adj = rte_thash_get_complement(h[i], hashes[i],
+				desired_value);
+			/* convert back to BE to adjust the value */
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_cpu_to_be_32(ptr[j]);
+
+			tuples[i][rng_arr[i].byte_idx] ^= adj;
+
+			for (j = 0; j < 4; j++)
+				ptr[j] = rte_be_to_cpu_32(ptr[j]);
+
+			adj_hashes[i] = rte_softrss(ptr, 4, new_key);
+			RTE_TEST_ASSERT((adj_hashes[i] & HASH_MSK(reta_sz)) ==
+				desired_value,
+				"bad desired value for %d tuple\n", i);
+		}
+	}
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static int
+cmp_tuple_eq(void *userdata, uint8_t *tuple)
+{
+	return memcmp(userdata, tuple, TUPLE_SZ);
+}
+
+static int
+test_adjust_tuple(void)
+{
+	struct rte_thash_ctx *ctx;
+	struct rte_thash_subtuple_helper *h;
+	const int key_len = 40;
+	const uint8_t *new_key;
+	uint8_t tuple[TUPLE_SZ];
+	uint32_t tmp_tuple[TUPLE_SZ / sizeof(uint32_t)];
+	uint32_t tuple_copy[TUPLE_SZ / sizeof(uint32_t)];
+	uint32_t hash;
+	int reta_sz = CHAR_BIT;
+	int ret;
+	unsigned int i, desired_value = rte_rand() & HASH_MSK(reta_sz);
+
+	memset(tuple, 0xab, TUPLE_SZ);
+
+	ctx = rte_thash_init_ctx("test", key_len, reta_sz, NULL, 0);
+	RTE_TEST_ASSERT(ctx != NULL, "can not create thash ctx\n");
+
+	/*
+	 * set offset to be in the middle of a byte
+	 * set size of the subtuple to be 2 * rets_sz
+	 * to have the room for random bits
+	 */
+	ret = rte_thash_add_helper(ctx, "test", reta_sz * 2,
+		(5 * CHAR_BIT) + 4);
+	RTE_TEST_ASSERT(ret == 0, "can not add helper, ret %d\n", ret);
+
+	new_key = rte_thash_get_key(ctx);
+
+	h = rte_thash_get_helper(ctx, "test");
+	RTE_TEST_ASSERT(h != NULL, "can not find helper\n");
+
+	ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
+		1, NULL, NULL);
+	RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
+
+	for (i = 0; i < (TUPLE_SZ / 4); i++)
+		tmp_tuple[i] =
+			rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]);
+
+	hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key);
+	RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+
+	/* Pass previously calculated tuple to callback function */
+	memcpy(tuple_copy, tuple, TUPLE_SZ);
+
+	memset(tuple, 0xab, TUPLE_SZ);
+	ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
+		1, cmp_tuple_eq, tuple_copy);
+	RTE_TEST_ASSERT(ret == -EEXIST,
+		"adjust tuple didn't indicate collision\n");
+
+	/*
+	 * Make the function to generate random bits into subtuple
+	 * after first adjustment attempt.
+	 */
+	memset(tuple, 0xab, TUPLE_SZ);
+	ret = rte_thash_adjust_tuple(ctx, h, tuple, TUPLE_SZ, desired_value,
+		2, cmp_tuple_eq, tuple_copy);
+	RTE_TEST_ASSERT(ret == 0, "can not adjust tuple, ret %d\n", ret);
+
+	for (i = 0; i < (TUPLE_SZ / 4); i++)
+		tmp_tuple[i] =
+			rte_be_to_cpu_32(*(uint32_t *)&tuple[i * 4]);
+
+	hash = rte_softrss(tmp_tuple, TUPLE_SZ / 4, new_key);
+	RTE_TEST_ASSERT((hash & HASH_MSK(reta_sz)) ==
+		desired_value, "bad desired value\n");
+
+	rte_thash_free_ctx(ctx);
+
+	return TEST_SUCCESS;
+}
+
+static struct unit_test_suite thash_tests = {
+	.suite_name = "thash autotest",
+	.setup = NULL,
+	.teardown = NULL,
+	.unit_test_cases = {
+	TEST_CASE(test_toeplitz_hash_calc),
+	TEST_CASE(test_create_invalid),
+	TEST_CASE(test_multiple_create),
+	TEST_CASE(test_free_null),
+	TEST_CASE(test_add_invalid_helper),
+	TEST_CASE(test_find_existing),
+	TEST_CASE(test_get_helper),
+	TEST_CASE(test_period_overflow),
+	TEST_CASE(test_predictable_rss_min_seq),
+	TEST_CASE(test_predictable_rss_multirange),
+	TEST_CASE(test_adjust_tuple),
+	TEST_CASES_END()
+	}
+};
+
+static int
+test_thash(void)
+{
+	return unit_test_suite_runner(&thash_tests);
 }
 
 REGISTER_TEST_COMMAND(thash_autotest, test_thash);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation
  2021-04-11 18:51     ` Medvedkin, Vladimir
@ 2021-04-12  9:47       ` Ananyev, Konstantin
  0 siblings, 0 replies; 23+ messages in thread
From: Ananyev, Konstantin @ 2021-04-12  9:47 UTC (permalink / raw)
  To: Medvedkin, Vladimir, dev
  Cc: Chilikin, Andrey, Kinsella, Ray, Wang, Yipeng1, Gobriel, Sameh,
	Richardson, Bruce

> 
> <snip>
> 
> >> +#defineRETA_SZ_MIN2U
> >> +#defineRETA_SZ_MAX16U
> >
> > Should these RETA_SZ defines be in public header?
> > So user can know what are allowed values?
> >
> 
> I don't think this is necessary, because the user chooses it not
> arbitrary, but depending on the NIC.

Sure thing, but it would be goo for the user to know what are the SW
Limitations on it without digging through .c.

> 
> >> +#define RETA_SZ_IN_RANGE(reta_sz)((reta_sz >= RETA_SZ_MIN) && \
> 
> <snip>
> 
> >> +uint32_t i;
> >
> > Empty line is  missing.
> >
> 
> Thanks
> 
> >> +if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
> >> +rte_errno = EINVAL;
> >> +return NULL;
> >> +}
> 
> <snip>
> 
> >> +static inline void
> >> +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
> >> +{
> >> +uint32_t byte_idx = pos >> 3;
> >
> > Just as a nit to be consistent with the line below:
> > pos / CHAR_BIT;
> >
> 
> Fixed
> 
> >> +uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
> >> +uint8_t tmp;
> 
> <snip>
> 
> >> +ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
> >> +sizeof(uint32_t) * (1 << ctx->reta_sz_log), 0);
> >
> > Helper can be used by data-path code (via rte_thash_get_compliment()) right?
> > Then might be better to align it at cache-line.
> >
> 
> Agree, I'll fix it
> 
> >> +if (ent == NULL)
> >> +return -ENOMEM;
> 
> <snip>
> 
> >>   uint32_t
> >> -rte_thash_get_compliment(struct rte_thash_subtuple_helper *h __rte_unused,
> >> -uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused)
> >> +rte_thash_get_compliment(struct rte_thash_subtuple_helper *h,
> >> +uint32_t hash, uint32_t desired_hash)
> >>   {
> >> -return 0;
> >> +return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
> >>   }
> >
> > Would it make sense to add another-one for multi values:
> > rte_thash_get_compliment(uint32_t hash, const uint32_t desired_hashes[], uint32_t adj_hash[], uint32_t num);
> > So user can get adjustment values for multiple queues at once?
> >
> 
> At the moment I can't find scenarios why do we need to have a bulk
> version for this function

My thought was about case when number of configured
HW queues is less than reta_size.
Let say reta_size==4, but user configured only 3 queues and reta={0,1,2,0}.
In that case for queue 0, both 0 and 3 values would suit. 


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2021-04-12  9:47 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-16 18:24 [dpdk-dev] [PATCH v1 0/3] Predictable RSS feature Vladimir Medvedkin
2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 1/3] hash: add predictable RSS API Vladimir Medvedkin
2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
2021-03-16 18:24 ` [dpdk-dev] [PATCH v1 3/3] test/hash: add additional thash tests Vladimir Medvedkin
2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 0/3] Predictable RSS feature Vladimir Medvedkin
2021-04-08 15:56   ` Stephen Hemminger
2021-04-11 18:51     ` Medvedkin, Vladimir
2021-04-10  0:32   ` Wang, Yipeng1
2021-04-11 18:51     ` Medvedkin, Vladimir
2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 " Vladimir Medvedkin
2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 1/3] hash: add predictable RSS API Vladimir Medvedkin
2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
2021-04-11 19:11   ` [dpdk-dev] [PATCH v3 3/3] test/hash: add additional thash tests Vladimir Medvedkin
2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 1/3] hash: add predictable RSS API Vladimir Medvedkin
2021-04-10  0:05   ` Wang, Yipeng1
2021-04-11 18:52     ` Medvedkin, Vladimir
2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 2/3] hash: add predictable RSS implementation Vladimir Medvedkin
2021-04-07 12:53   ` Ananyev, Konstantin
2021-04-11 18:51     ` Medvedkin, Vladimir
2021-04-12  9:47       ` Ananyev, Konstantin
2021-04-10  0:10   ` Wang, Yipeng1
2021-04-11 18:52     ` Medvedkin, Vladimir
2021-04-06 19:50 ` [dpdk-dev] [PATCH v2 3/3] test/hash: add additional thash tests Vladimir Medvedkin

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ http://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git