From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 07183A0524; Sun, 11 Apr 2021 21:11:30 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 5F3E614144E; Sun, 11 Apr 2021 21:11:24 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by mails.dpdk.org (Postfix) with ESMTP id 68BDD14144C for ; Sun, 11 Apr 2021 21:11:22 +0200 (CEST) IronPort-SDR: V8+cmacBwqEpS9ToDiJeNgWQ7qUxomj5fceqPr/cTFD5qOjt1du9YlSDTzFzoSm5q14Q8n7iEM yNIDafzG/kzQ== X-IronPort-AV: E=McAfee;i="6000,8403,9951"; a="190875042" X-IronPort-AV: E=Sophos;i="5.82,214,1613462400"; d="scan'208";a="190875042" Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 11 Apr 2021 12:11:20 -0700 IronPort-SDR: cBN9dpKwX+I4yO53mHEg9Xbe+IsolYuh+mYpNYyJVf7S0Wp2hBVRea+zzs54Pzsh+biXzLofoO wAOA2jcadkIw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,214,1613462400"; d="scan'208";a="381304916" Received: from silpixa00400072.ir.intel.com ([10.237.222.213]) by orsmga003.jf.intel.com with ESMTP; 11 Apr 2021 12:11:17 -0700 From: Vladimir Medvedkin To: dev@dpdk.org Cc: konstantin.ananyev@intel.com, andrey.chilikin@intel.com, ray.kinsella@intel.com, yipeng1.wang@intel.com, sameh.gobriel@intel.com, bruce.richardson@intel.com Date: Sun, 11 Apr 2021 20:11:05 +0100 Message-Id: <1618168266-338017-3-git-send-email-vladimir.medvedkin@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1618168266-338017-1-git-send-email-vladimir.medvedkin@intel.com> References: <1618168266-338017-1-git-send-email-vladimir.medvedkin@intel.com> In-Reply-To: <1617738643-258635-1-git-send-email-vladimir.medvedkin@intel.com> References: <1617738643-258635-1-git-send-email-vladimir.medvedkin@intel.com> Subject: [dpdk-dev] [PATCH v3 2/3] hash: add predictable RSS implementation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch implements predictable RSS functionality. Signed-off-by: Vladimir Medvedkin --- lib/librte_hash/rte_thash.c | 610 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 587 insertions(+), 23 deletions(-) diff --git a/lib/librte_hash/rte_thash.c b/lib/librte_hash/rte_thash.c index 1325678..c76ceee 100644 --- a/lib/librte_hash/rte_thash.c +++ b/lib/librte_hash/rte_thash.c @@ -12,6 +12,45 @@ #include #define THASH_NAME_LEN 64 +#define TOEPLITZ_HASH_LEN 32 + +#define RETA_SZ_MIN 2U +#define RETA_SZ_MAX 16U +#define RETA_SZ_IN_RANGE(reta_sz) ((reta_sz >= RETA_SZ_MIN) && \ + (reta_sz <= RETA_SZ_MAX)) + +TAILQ_HEAD(rte_thash_list, rte_tailq_entry); +static struct rte_tailq_elem rte_thash_tailq = { + .name = "RTE_THASH", +}; +EAL_REGISTER_TAILQ(rte_thash_tailq) + +/** + * Table of some irreducible polinomials over GF(2). + * For lfsr they are reperesented in BE bit order, and + * x^0 is masked out. + * For example, poly x^5 + x^2 + 1 will be represented + * as (101001b & 11111b) = 01001b = 0x9 + */ +static const uint32_t irreducible_poly_table[][4] = { + {0, 0, 0, 0}, /** < degree 0 */ + {1, 1, 1, 1}, /** < degree 1 */ + {0x3, 0x3, 0x3, 0x3}, /** < degree 2 and so on... */ + {0x5, 0x3, 0x5, 0x3}, + {0x9, 0x3, 0x9, 0x3}, + {0x9, 0x1b, 0xf, 0x5}, + {0x21, 0x33, 0x1b, 0x2d}, + {0x41, 0x11, 0x71, 0x9}, + {0x71, 0xa9, 0xf5, 0x8d}, + {0x21, 0xd1, 0x69, 0x1d9}, + {0x81, 0x2c1, 0x3b1, 0x185}, + {0x201, 0x541, 0x341, 0x461}, + {0x941, 0x609, 0xe19, 0x45d}, + {0x1601, 0x1f51, 0x1171, 0x359}, + {0x2141, 0x2111, 0x2db1, 0x2109}, + {0x4001, 0x801, 0x101, 0x7301}, + {0x7781, 0xa011, 0x4211, 0x86d9}, +}; struct thash_lfsr { uint32_t ref_cnt; @@ -50,60 +89,585 @@ struct rte_thash_ctx { uint8_t hash_key[0]; }; +static inline uint32_t +get_bit_lfsr(struct thash_lfsr *lfsr) +{ + uint32_t bit, ret; + + /* + * masking the TAP bits defined by the polynomial and + * calculating parity + */ + bit = __builtin_popcount(lfsr->state & lfsr->poly) & 0x1; + ret = lfsr->state & 0x1; + lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) & + ((1 << lfsr->deg) - 1); + + lfsr->bits_cnt++; + return ret; +} + +static inline uint32_t +get_rev_bit_lfsr(struct thash_lfsr *lfsr) +{ + uint32_t bit, ret; + + bit = __builtin_popcount(lfsr->rev_state & lfsr->rev_poly) & 0x1; + ret = lfsr->rev_state & (1 << (lfsr->deg - 1)); + lfsr->rev_state = ((lfsr->rev_state << 1) | bit) & + ((1 << lfsr->deg) - 1); + + lfsr->bits_cnt++; + return ret; +} + +static inline uint32_t +thash_get_rand_poly(uint32_t poly_degree) +{ + return irreducible_poly_table[poly_degree][rte_rand() % + RTE_DIM(irreducible_poly_table[poly_degree])]; +} + +static struct thash_lfsr * +alloc_lfsr(struct rte_thash_ctx *ctx) +{ + struct thash_lfsr *lfsr; + uint32_t i; + + if (ctx == NULL) + return NULL; + + lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0); + if (lfsr == NULL) + return NULL; + + lfsr->deg = ctx->reta_sz_log; + lfsr->poly = thash_get_rand_poly(lfsr->deg); + do { + lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1); + } while (lfsr->state == 0); + /* init reverse order polynomial */ + lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1)); + /* init proper rev_state*/ + lfsr->rev_state = lfsr->state; + for (i = 0; i <= lfsr->deg; i++) + get_rev_bit_lfsr(lfsr); + + /* clear bits_cnt after rev_state was inited */ + lfsr->bits_cnt = 0; + lfsr->ref_cnt = 1; + + return lfsr; +} + +static void +attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr) +{ + lfsr->ref_cnt++; + h->lfsr = lfsr; +} + +static void +free_lfsr(struct thash_lfsr *lfsr) +{ + lfsr->ref_cnt--; + if (lfsr->ref_cnt == 0) + rte_free(lfsr); +} + struct rte_thash_ctx * -rte_thash_init_ctx(const char *name __rte_unused, - uint32_t key_len __rte_unused, uint32_t reta_sz __rte_unused, - uint8_t *key __rte_unused, uint32_t flags __rte_unused) +rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz, + uint8_t *key, uint32_t flags) { + struct rte_thash_ctx *ctx; + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + uint32_t i; + + if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) { + rte_errno = EINVAL; + return NULL; + } + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + + rte_mcfg_tailq_write_lock(); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, thash_list, next) { + ctx = (struct rte_thash_ctx *)te->data; + if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + ctx = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + /* allocate tailq entry */ + te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, HASH, + "Can not allocate tailq entry for thash context %s\n", + name); + rte_errno = ENOMEM; + goto exit; + } + + ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0); + if (ctx == NULL) { + RTE_LOG(ERR, HASH, "thash ctx %s memory allocation failed\n", + name); + rte_errno = ENOMEM; + goto free_te; + } + + rte_strlcpy(ctx->name, name, sizeof(ctx->name)); + ctx->key_len = key_len; + ctx->reta_sz_log = reta_sz; + LIST_INIT(&ctx->head); + ctx->flags = flags; + + if (key) + rte_memcpy(ctx->hash_key, key, key_len); + else { + for (i = 0; i < key_len; i++) + ctx->hash_key[i] = rte_rand(); + } + + te->data = (void *)ctx; + TAILQ_INSERT_TAIL(thash_list, te, next); + + rte_mcfg_tailq_write_unlock(); + + return ctx; +free_te: + rte_free(te); +exit: + rte_mcfg_tailq_write_unlock(); return NULL; } struct rte_thash_ctx * -rte_thash_find_existing(const char *name __rte_unused) +rte_thash_find_existing(const char *name) { - return NULL; + struct rte_thash_ctx *ctx; + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + + rte_mcfg_tailq_read_lock(); + TAILQ_FOREACH(te, thash_list, next) { + ctx = (struct rte_thash_ctx *)te->data; + if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + + rte_mcfg_tailq_read_unlock(); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return ctx; } void -rte_thash_free_ctx(struct rte_thash_ctx *ctx __rte_unused) +rte_thash_free_ctx(struct rte_thash_ctx *ctx) { + struct rte_tailq_entry *te; + struct rte_thash_list *thash_list; + struct rte_thash_subtuple_helper *ent, *tmp; + + if (ctx == NULL) + return; + + thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list); + rte_mcfg_tailq_write_lock(); + TAILQ_FOREACH(te, thash_list, next) { + if (te->data == (void *)ctx) + break; + } + + if (te != NULL) + TAILQ_REMOVE(thash_list, te, next); + + rte_mcfg_tailq_write_unlock(); + ent = LIST_FIRST(&(ctx->head)); + while (ent) { + free_lfsr(ent->lfsr); + tmp = ent; + ent = LIST_NEXT(ent, next); + LIST_REMOVE(tmp, next); + rte_free(tmp); + } + + rte_free(ctx); + rte_free(te); +} + +static inline void +set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) +{ + uint32_t byte_idx = pos / CHAR_BIT; + uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1)); + uint8_t tmp; + + tmp = ptr[byte_idx]; + tmp &= ~(1 << bit_idx); + tmp |= bit << bit_idx; + ptr[byte_idx] = tmp; +} + +/** + * writes m-sequence to the hash_key for range [start, end] + * (i.e. including start and end positions) + */ +static int +generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr, + uint32_t start, uint32_t end) +{ + uint32_t i; + uint32_t req_bits = (start < end) ? (end - start) : (start - end); + req_bits++; /* due to incuding end */ + + /* check if lfsr overflow period of the m-sequence */ + if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) && + ((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) != + RTE_THASH_IGNORE_PERIOD_OVERFLOW)) + return -ENOSPC; + + if (start < end) { + /* original direction (from left to right)*/ + for (i = start; i <= end; i++) + set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i); + + } else { + /* reverse direction (from right to left) */ + for (i = end; i >= start; i--) + set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i); + } + + return 0; +} + +static inline uint32_t +get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset) +{ + uint32_t *tmp, val; + + tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]); + val = rte_be_to_cpu_32(*tmp); + val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) + + ctx->reta_sz_log)); + + return val & ((1 << ctx->reta_sz_log) - 1); +} + +static inline void +generate_complement_table(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *h) +{ + int i, j, k; + uint32_t val; + uint32_t start; + + start = h->offset + h->len - (2 * ctx->reta_sz_log - 1); + + for (i = 1; i < (1 << ctx->reta_sz_log); i++) { + val = 0; + for (j = i; j; j &= (j - 1)) { + k = rte_bsf32(j); + val ^= get_subvalue(ctx, start - k + + ctx->reta_sz_log - 1); + } + h->compl_table[val] = i; + } +} + +static inline int +insert_before(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *ent, + struct rte_thash_subtuple_helper *cur_ent, + struct rte_thash_subtuple_helper *next_ent, + uint32_t start, uint32_t end, uint32_t range_end) +{ + int ret; + + if (end < cur_ent->offset) { + ent->lfsr = alloc_lfsr(ctx); + if (ent->lfsr == NULL) { + rte_free(ent); + return -ENOMEM; + } + /* generate nonoverlapping range [start, end) */ + ret = generate_subkey(ctx, ent->lfsr, start, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } else if ((next_ent != NULL) && (end > next_ent->offset)) { + rte_free(ent); + return -ENOSPC; + } + attach_lfsr(ent, cur_ent->lfsr); + + /** + * generate partially overlapping range + * [start, cur_ent->start) in reverse order + */ + ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + + if (end > range_end) { + /** + * generate partially overlapping range + * (range_end, end) + */ + ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } + + LIST_INSERT_BEFORE(cur_ent, ent, next); + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + return 0; +} + +static inline int +insert_after(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *ent, + struct rte_thash_subtuple_helper *cur_ent, + struct rte_thash_subtuple_helper *next_ent, + struct rte_thash_subtuple_helper *prev_ent, + uint32_t end, uint32_t range_end) +{ + int ret; + + if ((next_ent != NULL) && (end > next_ent->offset)) { + rte_free(ent); + return -EEXIST; + } + + attach_lfsr(ent, cur_ent->lfsr); + if (end > range_end) { + /** + * generate partially overlapping range + * (range_end, end) + */ + ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + } + + LIST_INSERT_AFTER(prev_ent, ent, next); + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + + return 0; } int -rte_thash_add_helper(struct rte_thash_ctx *ctx __rte_unused, - const char *name __rte_unused, uint32_t len __rte_unused, - uint32_t offset __rte_unused) +rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len, + uint32_t offset) { + struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent; + uint32_t start, end; + int ret; + + if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) || + ((offset + len + TOEPLITZ_HASH_LEN - 1) > + ctx->key_len * CHAR_BIT)) + return -EINVAL; + + /* Check for existing name*/ + LIST_FOREACH(cur_ent, &ctx->head, next) { + if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0) + return -EEXIST; + } + + end = offset + len + TOEPLITZ_HASH_LEN - 1; + start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) == + RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) : + offset; + + ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) + + sizeof(uint32_t) * (1 << ctx->reta_sz_log), + RTE_CACHE_LINE_SIZE); + if (ent == NULL) + return -ENOMEM; + + rte_strlcpy(ent->name, name, sizeof(ent->name)); + ent->offset = start; + ent->len = end - start; + ent->tuple_offset = offset; + ent->tuple_len = len; + ent->lsb_msk = (1 << ctx->reta_sz_log) - 1; + + cur_ent = LIST_FIRST(&ctx->head); + while (cur_ent) { + uint32_t range_end = cur_ent->offset + cur_ent->len; + next_ent = LIST_NEXT(cur_ent, next); + prev_ent = cur_ent; + /* Iterate through overlapping ranges */ + while ((next_ent != NULL) && (next_ent->offset < range_end)) { + range_end = RTE_MAX(next_ent->offset + next_ent->len, + range_end); + if (start > next_ent->offset) + prev_ent = next_ent; + + next_ent = LIST_NEXT(next_ent, next); + } + + if (start < cur_ent->offset) + return insert_before(ctx, ent, cur_ent, next_ent, + start, end, range_end); + else if (start < range_end) + return insert_after(ctx, ent, cur_ent, next_ent, + prev_ent, end, range_end); + + cur_ent = next_ent; + continue; + } + + ent->lfsr = alloc_lfsr(ctx); + if (ent->lfsr == NULL) { + rte_free(ent); + return -ENOMEM; + } + + /* generate nonoverlapping range [start, end) */ + ret = generate_subkey(ctx, ent->lfsr, start, end - 1); + if (ret != 0) { + free_lfsr(ent->lfsr); + rte_free(ent); + return ret; + } + if (LIST_EMPTY(&ctx->head)) { + LIST_INSERT_HEAD(&ctx->head, ent, next); + } else { + LIST_FOREACH(next_ent, &ctx->head, next) + prev_ent = next_ent; + + LIST_INSERT_AFTER(prev_ent, ent, next); + } + generate_complement_table(ctx, ent); + ctx->subtuples_nb++; + return 0; } struct rte_thash_subtuple_helper * -rte_thash_get_helper(struct rte_thash_ctx *ctx __rte_unused, - const char *name __rte_unused) +rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name) { + struct rte_thash_subtuple_helper *ent; + + if ((ctx == NULL) || (name == NULL)) + return NULL; + + LIST_FOREACH(ent, &ctx->head, next) { + if (strncmp(name, ent->name, sizeof(ent->name)) == 0) + return ent; + } + return NULL; } uint32_t -rte_thash_get_complement(struct rte_thash_subtuple_helper *h __rte_unused, - uint32_t hash __rte_unused, uint32_t desired_hash __rte_unused) +rte_thash_get_complement(struct rte_thash_subtuple_helper *h, + uint32_t hash, uint32_t desired_hash) { - return 0; + return h->compl_table[(hash ^ desired_hash) & h->lsb_msk]; } const uint8_t * -rte_thash_get_key(struct rte_thash_ctx *ctx __rte_unused) +rte_thash_get_key(struct rte_thash_ctx *ctx) { - return NULL; + return ctx->hash_key; +} + +static inline void +xor_bit(uint8_t *ptr, uint32_t bit, uint32_t pos) +{ + uint32_t byte_idx = pos >> 3; + uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1)); + uint8_t tmp; + + tmp = ptr[byte_idx]; + tmp ^= bit << bit_idx; + ptr[byte_idx] = tmp; } int -rte_thash_adjust_tuple(struct rte_thash_ctx *ctx __rte_unused, - struct rte_thash_subtuple_helper *h __rte_unused, - uint8_t *tuple __rte_unused, unsigned int tuple_len __rte_unused, - uint32_t desired_value __rte_unused, - unsigned int attempts __rte_unused, - rte_thash_check_tuple_t fn __rte_unused, void *userdata __rte_unused) +rte_thash_adjust_tuple(struct rte_thash_ctx *ctx, + struct rte_thash_subtuple_helper *h, + uint8_t *tuple, unsigned int tuple_len, + uint32_t desired_value, unsigned int attempts, + rte_thash_check_tuple_t fn, void *userdata) { - return 0; + uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)]; + unsigned int i, j, ret = 0; + uint32_t hash, adj_bits; + uint8_t bit; + const uint8_t *hash_key; + + if ((ctx == NULL) || (h == NULL) || (tuple == NULL) || + (tuple_len % sizeof(uint32_t) != 0) || (attempts <= 0)) + return -EINVAL; + + hash_key = rte_thash_get_key(ctx); + + for (i = 0; i < attempts; i++) { + for (j = 0; j < (tuple_len / 4); j++) + tmp_tuple[j] = + rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]); + + hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key); + adj_bits = rte_thash_get_complement(h, hash, desired_value); + + /* + * Hint: LSB of adj_bits corresponds to + * offset + len bit of tuple + */ + for (j = 0; j < sizeof(uint32_t) * CHAR_BIT; j++) { + bit = (adj_bits >> j) & 0x1; + if (bit) + xor_bit(tuple, bit, h->tuple_offset + + h->tuple_len - 1 - j); + } + + if (fn != NULL) { + ret = (fn(userdata, tuple)) ? 0 : -EEXIST; + if (ret == 0) + return 0; + else if (i < (attempts - 1)) { + /* Update tuple with random bits */ + for (j = 0; j < h->tuple_len; j++) { + bit = rte_rand() & 0x1; + if (bit) + xor_bit(tuple, bit, + h->tuple_offset + + h->tuple_len - 1 - j); + } + } + } else + return 0; + } + + return ret; } -- 2.7.4