DPDK patches and discussions
 help / color / mirror / Atom feed
From: Leyi Rong <leyi.rong@intel.com>
To: ferruh.yigit@xilinx.com, suanmingm@nvidia.com,
	yipeng1.wang@intel.com, zaoxingliu@gmail.com,
	sameh.gobriel@intel.com
Cc: dev@dpdk.org, Leyi Rong <leyi.rong@intel.com>
Subject: [PATCH v2 1/2] member: implement NitroSketch mode
Date: Wed, 31 Aug 2022 14:46:38 +0800	[thread overview]
Message-ID: <20220831064639.4163765-2-leyi.rong@intel.com> (raw)
In-Reply-To: <20220831064639.4163765-1-leyi.rong@intel.com>

Sketching algorithm provide high-fidelity approximate measurements and
appears as a promising alternative to traditional approaches such as
packet sampling.

NitroSketch [1] is a software sketching framework that optimizes
performance, provides accuracy guarantees, and supports a variety of
sketches.

This commit adds a new data structure called sketch into
membership library. This new data structure is an efficient
way to profile the traffic for heavy hitters. Also use min-heap
structure to maintain the top-k flow keys.

[1] Zaoxing Liu, Ran Ben-Basat, Gil Einziger, Yaron Kassner, Vladimir
Braverman, Roy Friedman, Vyas Sekar, "NitroSketch: Robust and General
Sketch-based Monitoring in Software Switches", in ACM SIGCOMM 2019.
https://dl.acm.org/doi/pdf/10.1145/3341302.3342076

Signed-off-by: Alan Liu <zaoxingliu@gmail.com>
Signed-off-by: Yipeng Wang <yipeng1.wang@intel.com>
Signed-off-by: Leyi Rong <leyi.rong@intel.com>
---
 lib/member/meson.build                |  38 +-
 lib/member/rte_member.c               |  75 ++++
 lib/member/rte_member.h               | 151 ++++++-
 lib/member/rte_member_heap.h          | 424 ++++++++++++++++++
 lib/member/rte_member_sketch.c        | 594 ++++++++++++++++++++++++++
 lib/member/rte_member_sketch.h        |  97 +++++
 lib/member/rte_member_sketch_avx512.c |  69 +++
 lib/member/rte_member_sketch_avx512.h |  36 ++
 lib/member/rte_xxh64_avx512.h         | 117 +++++
 lib/member/version.map                |   3 +
 10 files changed, 1600 insertions(+), 4 deletions(-)
 create mode 100644 lib/member/rte_member_heap.h
 create mode 100644 lib/member/rte_member_sketch.c
 create mode 100644 lib/member/rte_member_sketch.h
 create mode 100644 lib/member/rte_member_sketch_avx512.c
 create mode 100644 lib/member/rte_member_sketch_avx512.h
 create mode 100644 lib/member/rte_xxh64_avx512.h

diff --git a/lib/member/meson.build b/lib/member/meson.build
index e06fddc240..9b3418c25c 100644
--- a/lib/member/meson.build
+++ b/lib/member/meson.build
@@ -7,6 +7,42 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_member.c', 'rte_member_ht.c', 'rte_member_vbf.c')
+sources = files('rte_member.c', 'rte_member_ht.c', 'rte_member_vbf.c', 'rte_member_sketch.c')
 headers = files('rte_member.h')
 deps += ['hash']
+includes += include_directories('../hash', '../ring')
+
+# compile AVX512 version if:
+# we are building 64-bit binary AND binutils can generate proper code
+if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok
+    # compile AVX512 version if either:
+    # a. we have AVX512 supported in minimum instruction set
+    #    baseline
+    # b. it's not minimum instruction set, but supported by
+    #    compiler
+    #
+    # in former case, just add avx512 C file to files list
+    # in latter case, compile c file to static lib, using correct
+    # compiler flags, and then have the .o file from static lib
+    # linked into main lib.
+    sketch_avx512_cpu_support = (
+        cc.get_define('__AVX512F__', args: machine_args) != ''
+    )
+
+    if sketch_avx512_cpu_support == true
+	cflags += ['-DCC_AVX512_SUPPORT']
+	if cc.has_multi_arguments('-mavx512f', '-mavx512dq', '-mavx512ifma')
+	    cflags += ['-mavx512f', '-mavx512dq', '-mavx512ifma']
+	endif
+	sources += files('rte_member_sketch_avx512.c')
+    elif cc.has_multi_arguments('-mavx512f', '-mavx512dq', '-mavx512ifma')
+	cflags += ['-DCC_AVX512_SUPPORT']
+	cflags += ['-mavx512f', '-mavx512dq', '-mavx512ifma']
+	sketch_avx512_tmp = static_library('sketch_avx512_tmp',
+	    'rte_member_sketch_avx512.c',
+	    include_directories: includes,
+	    dependencies: static_rte_eal,
+	    c_args: cflags)
+	objs += sketch_avx512_tmp.extract_objects('rte_member_sketch_avx512.c')
+    endif
+endif
diff --git a/lib/member/rte_member.c b/lib/member/rte_member.c
index 7e1632e6b5..8f859f7fbd 100644
--- a/lib/member/rte_member.c
+++ b/lib/member/rte_member.c
@@ -9,10 +9,12 @@
 #include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_tailq.h>
+#include <rte_ring_elem.h>
 
 #include "rte_member.h"
 #include "rte_member_ht.h"
 #include "rte_member_vbf.h"
+#include "rte_member_sketch.h"
 
 TAILQ_HEAD(rte_member_list, rte_tailq_entry);
 static struct rte_tailq_elem rte_member_tailq = {
@@ -72,6 +74,9 @@ rte_member_free(struct rte_member_setsum *setsum)
 	case RTE_MEMBER_TYPE_VBF:
 		rte_member_free_vbf(setsum);
 		break;
+	case RTE_MEMBER_TYPE_SKETCH:
+		rte_member_free_sketch(setsum);
+		break;
 	default:
 		break;
 	}
@@ -86,6 +91,8 @@ rte_member_create(const struct rte_member_parameters *params)
 	struct rte_member_list *member_list;
 	struct rte_member_setsum *setsum;
 	int ret;
+	char ring_name[RTE_RING_NAMESIZE];
+	struct rte_ring *sketch_key_ring = NULL;
 
 	if (params == NULL) {
 		rte_errno = EINVAL;
@@ -100,6 +107,16 @@ rte_member_create(const struct rte_member_parameters *params)
 		return NULL;
 	}
 
+	if (params->type == RTE_MEMBER_TYPE_SKETCH) {
+		snprintf(ring_name, sizeof(ring_name), "SK_%s", params->name);
+		sketch_key_ring = rte_ring_create_elem(ring_name, sizeof(uint32_t),
+				rte_align32pow2(params->top_k), params->socket_id, 0);
+		if (sketch_key_ring == NULL) {
+			RTE_MEMBER_LOG(ERR, "Sketch Ring Memory allocation failed\n");
+			return NULL;
+		}
+	}
+
 	member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list);
 
 	rte_mcfg_tailq_write_lock();
@@ -145,6 +162,9 @@ rte_member_create(const struct rte_member_parameters *params)
 	case RTE_MEMBER_TYPE_VBF:
 		ret = rte_member_create_vbf(setsum, params);
 		break;
+	case RTE_MEMBER_TYPE_SKETCH:
+		ret = rte_member_create_sketch(setsum, params, sketch_key_ring);
+		break;
 	default:
 		goto error_unlock_exit;
 	}
@@ -162,6 +182,7 @@ rte_member_create(const struct rte_member_parameters *params)
 error_unlock_exit:
 	rte_free(te);
 	rte_free(setsum);
+	rte_ring_free(sketch_key_ring);
 	rte_mcfg_tailq_write_unlock();
 	return NULL;
 }
@@ -178,6 +199,23 @@ rte_member_add(const struct rte_member_setsum *setsum, const void *key,
 		return rte_member_add_ht(setsum, key, set_id);
 	case RTE_MEMBER_TYPE_VBF:
 		return rte_member_add_vbf(setsum, key, set_id);
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_add_sketch(setsum, key, set_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+rte_member_add_byte_count(const struct rte_member_setsum *setsum,
+			  const void *key, uint32_t byte_count)
+{
+	if (setsum == NULL || key == NULL || byte_count == 0)
+		return -EINVAL;
+
+	switch (setsum->type) {
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_add_sketch_byte_count(setsum, key, byte_count);
 	default:
 		return -EINVAL;
 	}
@@ -195,6 +233,8 @@ rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
 		return rte_member_lookup_ht(setsum, key, set_id);
 	case RTE_MEMBER_TYPE_VBF:
 		return rte_member_lookup_vbf(setsum, key, set_id);
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_lookup_sketch(setsum, key, set_id);
 	default:
 		return -EINVAL;
 	}
@@ -261,6 +301,36 @@ rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
 	}
 }
 
+int
+rte_member_query_count(const struct rte_member_setsum *setsum,
+		       const void *key, uint64_t *output)
+{
+	if (setsum == NULL || key == NULL || output == NULL)
+		return -EINVAL;
+
+	switch (setsum->type) {
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_query_sketch(setsum, key, output);
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+rte_member_report_heavyhitter(const struct rte_member_setsum *setsum,
+				void **key, uint64_t *count)
+{
+	if (setsum == NULL || key == NULL || count == NULL)
+		return -EINVAL;
+
+	switch (setsum->type) {
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_report_heavyhitter_sketch(setsum, key, count);
+	default:
+		return -EINVAL;
+	}
+}
+
 int
 rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
 			member_set_t set_id)
@@ -272,6 +342,8 @@ rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
 	case RTE_MEMBER_TYPE_HT:
 		return rte_member_delete_ht(setsum, key, set_id);
 	/* current vBF implementation does not support delete function */
+	case RTE_MEMBER_TYPE_SKETCH:
+		return rte_member_delete_sketch(setsum, key);
 	case RTE_MEMBER_TYPE_VBF:
 	default:
 		return -EINVAL;
@@ -290,6 +362,9 @@ rte_member_reset(const struct rte_member_setsum *setsum)
 	case RTE_MEMBER_TYPE_VBF:
 		rte_member_reset_vbf(setsum);
 		return;
+	case RTE_MEMBER_TYPE_SKETCH:
+		rte_member_reset_sketch(setsum);
+		return;
 	default:
 		return;
 	}
diff --git a/lib/member/rte_member.h b/lib/member/rte_member.h
index 2611015771..c133fa3ed7 100644
--- a/lib/member/rte_member.h
+++ b/lib/member/rte_member.h
@@ -39,6 +39,18 @@
  * |          |                     | not overwrite  |                         |
  * |          |                     | existing key.  |                         |
  * +----------+---------------------+----------------+-------------------------+
+ * +==========+=============================+
+ * |   type   |      sketch                 |
+ * +==========+=============================+
+ * |structure | counting bloom filter array |
+ * +----------+-----------------------------+
+ * |set id    | 1: heavy set, 0: light set  |
+ * |          |                             |
+ * +----------+-----------------------------+
+ * |usages &  | count size of a flow,       |
+ * |properties| used for heavy hitter       |
+ * |          | detection.                  |
+ * +----------+-----------------------------+
  * -->
  */
 
@@ -50,6 +62,8 @@ extern "C" {
 #endif
 
 #include <stdint.h>
+#include <stdbool.h>
+#include <inttypes.h>
 
 #include <rte_common.h>
 
@@ -65,6 +79,20 @@ typedef uint16_t member_set_t;
 #define RTE_MEMBER_BUCKET_ENTRIES 16
 /** Maximum number of characters in setsum name. */
 #define RTE_MEMBER_NAMESIZE 32
+/** Max value of the random number */
+#define RTE_RAND_MAX      ~0LLU
+/**
+ * As packets skipped in the sampling-based algorithm, the accounting
+ * results accuracy is not guaranteed in the start stage. There should
+ * be a "convergence time" to achieve the accuracy after receiving enough
+ * packets.
+ * For sketch, use the flag if prefer always bounded mode, which only
+ * starts sampling after receiving enough packets to keep the results
+ * accuracy always bounded.
+ */
+#define RTE_MEMBER_SKETCH_ALWAYS_BOUNDED 0x01
+/** For sketch, use the flag if to count packet size instead of packet count */
+#define RTE_MEMBER_SKETCH_COUNT_BYTE 0x02
 
 /** @internal Hash function used by membership library. */
 #if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
@@ -104,6 +132,7 @@ struct rte_member_parameters;
 enum rte_member_setsum_type {
 	RTE_MEMBER_TYPE_HT = 0,  /**< Hash table based set summary. */
 	RTE_MEMBER_TYPE_VBF,     /**< Vector of bloom filters. */
+	RTE_MEMBER_TYPE_SKETCH,
 	RTE_MEMBER_NUM_TYPE
 };
 
@@ -114,6 +143,19 @@ enum rte_member_sig_compare_function {
 	RTE_MEMBER_COMPARE_NUM
 };
 
+/* sketch update function with different implementations. */
+typedef void (*sketch_update_fn_t)(const struct rte_member_setsum *ss,
+				   const void *key,
+				   uint32_t count);
+
+/* sketch lookup function with different implementations. */
+typedef uint64_t (*sketch_lookup_fn_t)(const struct rte_member_setsum *ss,
+				       const void *key);
+
+/* sketch delete function with different implementations. */
+typedef void (*sketch_delete_fn_t)(const struct rte_member_setsum *ss,
+				   const void *key);
+
 /** @internal setsummary structure. */
 struct rte_member_setsum {
 	enum rte_member_setsum_type type; /* Type of the set summary. */
@@ -134,6 +176,21 @@ struct rte_member_setsum {
 	uint32_t bit_mask;	/* Bit mask to get bit location in bf. */
 	uint32_t num_hashes;	/* Number of hash values to index bf. */
 
+	/* Parameters for sketch */
+	float error_rate;
+	float sample_rate;
+	uint32_t num_col;
+	uint32_t num_row;
+	int always_bounded;
+	double converge_thresh;
+	uint32_t topk;
+	uint32_t count_byte;
+	uint64_t *hash_seeds;
+	sketch_update_fn_t sketch_update; /* Pointer to the sketch update function */
+	sketch_lookup_fn_t sketch_lookup; /* Pointer to the sketch lookup function */
+	sketch_delete_fn_t sketch_delete; /* Pointer to the sketch delete function */
+
+	void *runtime_var;
 	uint32_t mul_shift;  /* vbf internal variable used during bit test. */
 	uint32_t div_shift;  /* vbf internal variable used during bit test. */
 
@@ -143,6 +200,9 @@ struct rte_member_setsum {
 	/* Second cache line should start here. */
 	uint32_t socket_id;          /* NUMA Socket ID for memory. */
 	char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */
+#ifdef RTE_ARCH_X86
+	bool use_avx512;
+#endif
 } __rte_cache_aligned;
 
 /**
@@ -261,8 +321,33 @@ struct rte_member_parameters {
 	 */
 	uint32_t sec_hash_seed;
 
+	/**
+	 * For count(min) sketch data structure, error rate defines the accuracy
+	 * required by the user. Higher accuracy leads to more memory usage, but
+	 * the flow size is estimated more accurately.
+	 */
+	float error_rate;
+
+	/**
+	 * Sampling rate means the internal sample rate of the rows of the count
+	 * min sketches. Lower sampling rate can reduce CPU overhead, but the
+	 * data structure will require more time to converge statistically.
+	 */
+	float sample_rate;
+
+	/**
+	 * How many top heavy hitter to be reported. The library will internally
+	 * keep the keys of heavy hitters for final report.
+	 */
+	uint32_t top_k;
+
+	/**
+	 * Extra flags that may passed in by user
+	 */
+	uint32_t extra_flag;
+
 	int socket_id;			/**< NUMA Socket ID for memory. */
-};
+} __rte_cache_aligned;
 
 /**
  * @warning
@@ -418,7 +503,7 @@ rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
  *   RTE_MEMBER_NO_MATCH by default is set as 0.
  *   For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved.
  *   For vBF mode the set id is limited by the num_set parameter when create
- *   the set-summary.
+ *   the set-summary. For sketch mode, this id is ignored.
  * @return
  *   HT (cache mode) and vBF should never fail unless the set_id is not in the
  *   valid range. In such case -EINVAL is returned.
@@ -429,12 +514,72 @@ rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
  *   Return 0 for HT (cache mode) if the add does not cause
  *   eviction, return 1 otherwise. Return 0 for non-cache mode if success,
  *   -ENOSPC for full, and 1 if cuckoo eviction happens.
- *   Always returns 0 for vBF mode.
+ *   Always returns 0 for vBF mode and sketch.
  */
 int
 rte_member_add(const struct rte_member_setsum *setsum, const void *key,
 			member_set_t set_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add the packet byte size into the sketch.
+ *
+ * @param setsum
+ *   Pointer of a set-summary.
+ * @param key
+ *   Pointer of the key to be added.
+ * @param byte_count
+ *   Add the byte count of the packet into the sketch.
+ * @return
+ * Return -EINVAL for invalid parameters, otherwise return 0.
+ */
+int
+rte_member_add_byte_count(const struct rte_member_setsum *setsum,
+			  const void *key, uint32_t byte_count);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Query packet count for a certain flow-key.
+ *
+ * @param setsum
+ *   Pointer of a set-summary.
+ * @param key
+ *   Pointer of the key to be added.
+ * @param count
+ *   The output packet count or byte count.
+ * @return
+ *   Return -EINVAL for invalid parameters.
+ */
+int
+rte_member_query_count(const struct rte_member_setsum *setsum,
+		       const void *key, uint64_t *count);
+
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Report heavyhitter flow-keys into set-summary (SS).
+ *
+ * @param setsum
+ *   Pointer of a set-summary.
+ * @param keys
+ *   Pointer of the output top-k key array.
+ * @param counts
+ *   Pointer of the output packet count or byte count array of the top-k keys.
+ * @return
+ *   Return -EINVAL for invalid parameters. Return a positive integer indicate
+ *   how many heavy hitters are reported.
+ */
+int
+rte_member_report_heavyhitter(const struct rte_member_setsum *setsum,
+			      void **keys, uint64_t *counts);
+
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/member/rte_member_heap.h b/lib/member/rte_member_heap.h
new file mode 100644
index 0000000000..3ced34160a
--- /dev/null
+++ b/lib/member/rte_member_heap.h
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020, Alan Liu <zaoxingliu@gmail.com>
+ */
+
+#ifndef _RTE_MEMBER_HEAP_H_
+#define _RTE_MEMBER_HEAP_H_
+
+#include <rte_ring_elem.h>
+#include "rte_member.h"
+
+#define LCHILD(x) (2 * x + 1)
+#define RCHILD(x) (2 * x + 2)
+#define PARENT(x) ((x - 1) / 2)
+
+#define HASH_BKT_SIZE 16
+#define HASH_HP_MULTI 4
+#define HASH_RESIZE_MULTI 2
+
+struct hash_bkt {
+	uint16_t sig[HASH_BKT_SIZE];
+	uint16_t idx[HASH_BKT_SIZE];
+};
+
+struct hash {
+	uint16_t bkt_cnt;
+	uint16_t num_item;
+	uint32_t seed;
+	struct hash_bkt buckets[0];
+};
+
+struct node {
+	void *key;
+	uint64_t count;
+};
+
+struct minheap {
+	uint32_t key_len;
+	uint32_t size;
+	uint32_t socket;
+	struct hash *hashtable;
+	struct node *elem;
+};
+
+static int
+hash_table_insert(const void *key, int value, int key_len, struct hash *table)
+{
+	uint32_t hash = MEMBER_HASH_FUNC(key, key_len, table->seed);
+	uint16_t idx = hash % table->bkt_cnt;
+	uint16_t sig = hash >> 16;
+	int i;
+
+	for (i = 0; i < HASH_BKT_SIZE; i++) {
+		if (table->buckets[idx].idx[i] == 0) {
+			table->buckets[idx].idx[i] = value;
+			table->buckets[idx].sig[i] = sig;
+			table->num_item++;
+			return 0;
+		}
+	}
+
+	return -ENOMEM;
+}
+
+static int
+hash_table_update(const void *key, int old_value, int value, int key_len, struct hash *table)
+{
+	uint32_t hash = MEMBER_HASH_FUNC(key, key_len, table->seed);
+	uint16_t idx = hash % table->bkt_cnt;
+	uint16_t sig = hash >> 16;
+	int i;
+
+	for (i = 0; i < HASH_BKT_SIZE; i++) {
+		if (table->buckets[idx].sig[i] == sig && table->buckets[idx].idx[i] == old_value) {
+			table->buckets[idx].idx[i] = value;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int
+hash_table_del(const void *key, uint16_t value, int key_len, struct hash *table)
+{
+	uint32_t hash = MEMBER_HASH_FUNC(key, key_len, table->seed);
+	uint16_t idx = hash % table->bkt_cnt;
+	uint16_t sig = hash >> 16;
+	int i;
+
+	for (i = 0; i < HASH_BKT_SIZE; i++) {
+		if (table->buckets[idx].sig[i] == sig && table->buckets[idx].idx[i] == value) {
+			table->buckets[idx].idx[i] = 0;
+			table->num_item--;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int
+hash_table_lookup(const void *key, int key_len, struct minheap *hp)
+{
+	struct hash *table = hp->hashtable;
+	uint32_t hash = MEMBER_HASH_FUNC(key, key_len, table->seed);
+	uint16_t idx = hash % table->bkt_cnt;
+	uint16_t sig = hash >> 16;
+	int i;
+
+	for (i = 0; i < HASH_BKT_SIZE; i++) {
+		if (table->buckets[idx].sig[i] == sig && table->buckets[idx].idx[i] != 0) {
+			uint32_t hp_idx = table->buckets[idx].idx[i] - 1;
+
+			if (memcmp(hp->elem[hp_idx].key, key, hp->key_len) == 0)
+				return hp_idx;
+		}
+	}
+
+	return -ENOENT; /* key doesn't exist */
+}
+
+static int
+resize_hash_table(struct minheap *hp)
+{
+	uint32_t i;
+	uint32_t new_bkt_cnt;
+
+	while (1) {
+		new_bkt_cnt = hp->hashtable->bkt_cnt * HASH_RESIZE_MULTI;
+
+		RTE_MEMBER_LOG(ERR, "Sketch Minheap HT load factor is [%f]\n",
+			hp->hashtable->num_item / ((float)hp->hashtable->bkt_cnt * HASH_BKT_SIZE));
+		RTE_MEMBER_LOG(ERR, "Sketch Minheap HT resize happen!\n");
+		rte_free(hp->hashtable);
+		hp->hashtable = rte_zmalloc_socket(NULL, sizeof(struct hash) +
+						new_bkt_cnt * sizeof(struct hash_bkt),
+						RTE_CACHE_LINE_SIZE, hp->socket);
+
+		if (hp->hashtable == NULL) {
+			RTE_MEMBER_LOG(ERR, "Sketch Minheap HT allocation failed\n");
+			return -ENOMEM;
+		}
+
+		hp->hashtable->bkt_cnt = new_bkt_cnt;
+
+		for (i = 0; i < hp->size; ++i) {
+			if (hash_table_insert(hp->elem[i].key,
+				i + 1, hp->key_len, hp->hashtable) < 0) {
+				RTE_MEMBER_LOG(ERR,
+					"Sketch Minheap HT resize insert fail!\n");
+				break;
+			}
+		}
+		if (i == hp->size)
+			break;
+	}
+
+	return 0;
+}
+
+/* find the item in the given minheap */
+static int
+rte_member_minheap_find(struct minheap *hp, const void *key)
+{
+	int idx = hash_table_lookup(key, hp->key_len, hp);
+	return idx;
+}
+
+static int
+rte_member_minheap_init(struct minheap *heap, int size,
+			uint32_t socket, uint32_t seed)
+{
+	heap->elem = rte_zmalloc_socket(NULL, sizeof(struct node) * size,
+				RTE_CACHE_LINE_SIZE, socket);
+	if (heap->elem == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Minheap elem allocation failed\n");
+		return -ENOMEM;
+	}
+
+	uint32_t hash_bkt_cnt = rte_align32pow2(size * HASH_HP_MULTI) / HASH_BKT_SIZE;
+
+	if (hash_bkt_cnt == 0)
+		hash_bkt_cnt = 1;
+
+	heap->hashtable = rte_zmalloc_socket(NULL, sizeof(struct hash) +
+					hash_bkt_cnt * sizeof(struct hash_bkt),
+					RTE_CACHE_LINE_SIZE, socket);
+
+	if (heap->hashtable == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Minheap HT allocation failed\n");
+		rte_free(heap->elem);
+		return -ENOMEM;
+	}
+
+	heap->hashtable->seed = seed;
+	heap->hashtable->bkt_cnt = hash_bkt_cnt;
+	heap->socket = socket;
+
+	return 0;
+}
+
+/* swap the minheap nodes */
+static __rte_always_inline void
+rte_member_heap_swap(struct node *n1, struct node *n2)
+{
+	struct node temp = *n1;
+	*n1 = *n2;
+	*n2 = temp;
+}
+
+/* heapify function */
+static void
+rte_member_heapify(struct minheap *hp, uint32_t idx, bool update_hash)
+{
+	uint32_t smallest;
+
+	if (LCHILD(idx) < hp->size &&
+			hp->elem[LCHILD(idx)].count < hp->elem[idx].count)
+		smallest = LCHILD(idx);
+	else
+		smallest = idx;
+
+	if (RCHILD(idx) < hp->size &&
+			hp->elem[RCHILD(idx)].count < hp->elem[smallest].count)
+		smallest = RCHILD(idx);
+
+	if (smallest != idx) {
+		rte_member_heap_swap(&(hp->elem[idx]), &(hp->elem[smallest]));
+
+		if (update_hash) {
+			if (hash_table_update(hp->elem[smallest].key, idx + 1, smallest + 1,
+					hp->key_len, hp->hashtable) < 0) {
+				RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+				return;
+			}
+
+			if (hash_table_update(hp->elem[idx].key, smallest + 1, idx + 1,
+					hp->key_len, hp->hashtable) < 0) {
+				RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+				return;
+			}
+		}
+		rte_member_heapify(hp, smallest, update_hash);
+	}
+}
+
+/* insert a node into the minheap */
+static int
+rte_member_minheap_insert_node(struct minheap *hp, const void *key,
+			       int counter, void *key_slot,
+			       struct rte_ring *free_key_slot)
+{
+	struct node nd;
+	uint32_t slot_id;
+
+	if (rte_ring_sc_dequeue_elem(free_key_slot, &slot_id, sizeof(uint32_t)) != 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap get empty keyslot failed\n");
+		return -1;
+	}
+
+	nd.count = counter;
+	nd.key = RTE_PTR_ADD(key_slot, slot_id * hp->key_len);
+
+	memcpy(nd.key, key, hp->key_len);
+
+	uint32_t i = (hp->size)++;
+
+	while (i && nd.count < hp->elem[PARENT(i)].count) {
+		hp->elem[i] = hp->elem[PARENT(i)];
+		if (hash_table_update(hp->elem[i].key, PARENT(i) + 1, i + 1,
+				hp->key_len, hp->hashtable) < 0) {
+			RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+			return -1;
+		}
+		i = PARENT(i);
+	}
+	hp->elem[i] = nd;
+
+	if (hash_table_insert(key, i + 1, hp->key_len, hp->hashtable) < 0) {
+		if (resize_hash_table(hp) < 0) {
+			RTE_MEMBER_LOG(ERR, "Minheap Hash Table resize failed\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/* delete a key from the minheap */
+static int
+rte_member_minheap_delete_node(struct minheap *hp, const void *key,
+			       void *key_slot, struct rte_ring *free_key_slot)
+{
+	int idx = rte_member_minheap_find(hp, key);
+	uint32_t offset = RTE_PTR_DIFF(hp->elem[idx].key, key_slot) / hp->key_len;
+
+	if (hash_table_del(key, idx + 1, hp->key_len, hp->hashtable) < 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap Hash Table delete failed\n");
+		return -1;
+	}
+
+	rte_ring_sp_enqueue_elem(free_key_slot, &offset, sizeof(uint32_t));
+
+	if (idx == (int)(hp->size - 1)) {
+		hp->size--;
+		return 0;
+	}
+
+	hp->elem[idx] = hp->elem[hp->size - 1];
+
+	if (hash_table_update(hp->elem[idx].key, hp->size, idx + 1,
+				hp->key_len, hp->hashtable) < 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+		return -1;
+	}
+	hp->size--;
+	rte_member_heapify(hp, idx, true);
+
+	return 0;
+}
+
+/* replace a min node with a new key. */
+static int
+rte_member_minheap_replace_node(struct minheap *hp,
+				const void *new_key,
+				int new_counter)
+{
+	struct node nd;
+	void *recycle_key = NULL;
+
+	recycle_key = hp->elem[0].key;
+
+	if (hash_table_del(recycle_key, 1, hp->key_len, hp->hashtable) < 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap Hash Table delete failed\n");
+		return -1;
+	}
+
+	hp->elem[0] = hp->elem[hp->size - 1];
+
+	if (hash_table_update(hp->elem[0].key, hp->size, 1,
+				hp->key_len, hp->hashtable) < 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+		return -1;
+	}
+	hp->size--;
+
+	rte_member_heapify(hp, 0, true);
+
+	nd.count = new_counter;
+	nd.key = recycle_key;
+
+	memcpy(nd.key, new_key, hp->key_len);
+
+	uint32_t i = (hp->size)++;
+
+	while (i && nd.count < hp->elem[PARENT(i)].count) {
+		hp->elem[i] = hp->elem[PARENT(i)];
+		if (hash_table_update(hp->elem[i].key, PARENT(i) + 1, i + 1,
+				hp->key_len, hp->hashtable) < 0) {
+			RTE_MEMBER_LOG(ERR, "Minheap Hash Table update failed\n");
+			return -1;
+		}
+		i = PARENT(i);
+	}
+
+	hp->elem[i] = nd;
+
+	if (hash_table_insert(new_key, i + 1, hp->key_len, hp->hashtable) < 0) {
+		RTE_MEMBER_LOG(ERR, "Minheap Hash Table replace insert failed\n");
+		if (resize_hash_table(hp) < 0) {
+			RTE_MEMBER_LOG(ERR, "Minheap Hash Table replace resize failed\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/* sort the heap into a descending array */
+static void
+rte_member_heapsort(struct minheap *hp, struct node *result_array)
+{
+	struct minheap new_hp;
+
+	/* build a new heap for using the given array */
+	new_hp.size = hp->size;
+	new_hp.key_len = hp->key_len;
+	new_hp.elem = result_array;
+	memcpy(result_array, hp->elem, hp->size * sizeof(struct node));
+
+	/* sort the new heap */
+	while (new_hp.size > 1) {
+		rte_member_heap_swap(&(new_hp.elem[0]), &(new_hp.elem[new_hp.size - 1]));
+		new_hp.size--;
+		rte_member_heapify(&new_hp, 0, false);
+	}
+}
+
+static void
+rte_member_minheap_free(struct minheap *hp)
+{
+	if (hp == NULL)
+		return;
+
+	rte_free(hp->elem);
+	rte_free(hp->hashtable);
+}
+
+static void
+rte_member_minheap_reset(struct minheap *hp)
+{
+	if (hp == NULL)
+		return;
+
+	memset(hp->elem, 0, sizeof(struct node) * hp->size);
+	hp->size = 0;
+
+	memset((char *)hp->hashtable + sizeof(struct hash), 0,
+			hp->hashtable->bkt_cnt * sizeof(struct hash_bkt));
+	hp->hashtable->num_item = 0;
+}
+
+#endif /* _RTE_MEMBER_HEAP_H_ */
diff --git a/lib/member/rte_member_sketch.c b/lib/member/rte_member_sketch.c
new file mode 100644
index 0000000000..524ba77620
--- /dev/null
+++ b/lib/member/rte_member_sketch.c
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020, Alan Liu <zaoxingliu@gmail.com>
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_prefetch.h>
+#include <rte_ring_elem.h>
+
+#include "rte_member.h"
+#include "rte_member_sketch.h"
+#include "rte_member_heap.h"
+
+#ifdef CC_AVX512_SUPPORT
+#include "rte_member_sketch_avx512.h"
+#endif /* CC_AVX512_SUPPORT */
+
+struct sketch_runtime {
+	uint64_t pkt_cnt;
+	uint32_t until_next;
+	int converged;
+	struct minheap heap;
+	struct node *report_array;
+	void *key_slots;
+	struct rte_ring *free_key_slots;
+} __rte_cache_aligned;
+
+/*
+ * Geometric sampling to calculate how many packets needs to be
+ * skipped until next update. This method can mitigate the CPU
+ * overheads compared with coin-toss sampling.
+ */
+static uint32_t
+draw_geometric(const struct rte_member_setsum *ss)
+{
+	double rand = 1;
+
+	if (ss->sample_rate == 1)
+		return 1;
+
+	while (rand == 1 || rand == 0)
+		rand = (double) rte_rand() / (double)(RTE_RAND_MAX);
+
+	return (uint32_t)ceil(log(1 - rand) / log(1 - ss->sample_rate));
+}
+
+static void
+isort(uint64_t *array, int n)
+{
+	int i;
+
+	for (i = 1; i < n; i++) {
+		uint64_t t = array[i];
+		int j;
+
+		for (j = i - 1; j >= 0; j--) {
+			if (t < array[j])
+				array[j + 1] = array[j];
+			else
+				break;
+		}
+		array[j + 1] = t;
+	}
+}
+
+static __rte_always_inline void
+swap(uint64_t *a, uint64_t *b)
+{
+	uint64_t tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+static uint64_t
+medianof5(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e)
+{
+	if (a > b)
+		swap(&a, &b);
+	if (c > d)
+		swap(&c, &d);
+	if (a > c) {
+		if (d > e)
+			swap(&c, &e);
+		else {
+			swap(&c, &d);
+			swap(&d, &e);
+		}
+	} else {
+		if (b > e)
+			swap(&a, &e);
+		else {
+			swap(&a, &b);
+			swap(&b, &e);
+		}
+	}
+
+	if (a > c)
+		return a > d ? d : a;
+	else
+		return b > c ? c : b;
+}
+
+int
+rte_member_create_sketch(struct rte_member_setsum *ss,
+			 const struct rte_member_parameters *params,
+			 struct rte_ring *ring)
+{
+	struct sketch_runtime *runtime;
+	uint32_t num_col;
+	uint32_t i;
+
+	if (params->sample_rate == 0 || params->sample_rate > 1) {
+		rte_errno = EINVAL;
+		RTE_MEMBER_LOG(ERR,
+			"Membership Sketch created with invalid parameters\n");
+		return -EINVAL;
+	}
+
+	if (params->extra_flag & RTE_MEMBER_SKETCH_COUNT_BYTE)
+		ss->count_byte = 1;
+
+#ifdef RTE_ARCH_X86
+	if (ss->count_byte == 1 &&
+		rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512 &&
+		rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
+		rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512IFMA) == 1) {
+#ifdef CC_AVX512_SUPPORT
+		ss->use_avx512 = true;
+#else
+		ss->use_avx512 = false;
+#endif
+	}
+
+	if (ss->use_avx512 == true) {
+#ifdef CC_AVX512_SUPPORT
+		ss->num_row = NUM_ROW_VEC;
+		RTE_MEMBER_LOG(NOTICE,
+			"Membership Sketch AVX512 update/lookup/delete ops is selected\n");
+		ss->sketch_update = sketch_update_avx512;
+		ss->sketch_lookup = sketch_lookup_avx512;
+		ss->sketch_delete = sketch_delete_avx512;
+#endif
+	} else
+#endif
+	{
+		ss->num_row = NUM_ROW_SCALAR;
+		RTE_MEMBER_LOG(NOTICE,
+			"Membership Sketch SCALAR update/lookup/delete ops is selected\n");
+		ss->sketch_update = sketch_update_scalar;
+		ss->sketch_lookup = sketch_lookup_scalar;
+		ss->sketch_delete = sketch_delete_scalar;
+	}
+
+	ss->socket_id = params->socket_id;
+
+	if (ss->count_byte == 0)
+		num_col = 4.0 / params->error_rate / params->sample_rate;
+#ifdef RTE_ARCH_X86
+	else if (ss->use_avx512 == true)
+		num_col = rte_align32pow2(4.0 / params->error_rate);
+#endif
+	else
+		num_col = 4.0 / params->error_rate;
+
+	ss->table = rte_zmalloc_socket(NULL,
+			sizeof(uint64_t) * num_col * ss->num_row,
+			RTE_CACHE_LINE_SIZE, ss->socket_id);
+	if (ss->table == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Table memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	ss->hash_seeds = rte_zmalloc_socket(NULL, sizeof(uint64_t) * ss->num_row,
+			RTE_CACHE_LINE_SIZE, ss->socket_id);
+	if (ss->hash_seeds == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Hashseeds memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	ss->runtime_var = rte_zmalloc_socket(NULL, sizeof(struct sketch_runtime),
+					RTE_CACHE_LINE_SIZE, ss->socket_id);
+	if (ss->runtime_var == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Runtime memory allocation failed\n");
+		rte_free(ss);
+		return -ENOMEM;
+	}
+	runtime = ss->runtime_var;
+
+	ss->num_col = num_col;
+	ss->sample_rate = params->sample_rate;
+	ss->prim_hash_seed = params->prim_hash_seed;
+	ss->sec_hash_seed = params->sec_hash_seed;
+	ss->error_rate = params->error_rate;
+	ss->topk = params->top_k;
+	ss->key_len = params->key_len;
+	runtime->heap.key_len = ss->key_len;
+
+	runtime->key_slots = rte_zmalloc_socket(NULL, ss->key_len * ss->topk,
+					RTE_CACHE_LINE_SIZE, ss->socket_id);
+	if (runtime->key_slots == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Key Slots allocation failed\n");
+		goto error;
+	}
+
+	runtime->free_key_slots = ring;
+	for (i = 0; i < ss->topk; i++)
+		rte_ring_sp_enqueue_elem(runtime->free_key_slots,
+					&i, sizeof(uint32_t));
+
+	if (rte_member_minheap_init(&(runtime->heap), params->top_k,
+			ss->socket_id, params->prim_hash_seed) < 0) {
+		RTE_MEMBER_LOG(ERR, "Sketch Minheap allocation failed\n");
+		goto error_runtime;
+	}
+
+	runtime->report_array = rte_zmalloc_socket(NULL, sizeof(struct node) * ss->topk,
+					RTE_CACHE_LINE_SIZE, ss->socket_id);
+	if (runtime->report_array == NULL) {
+		RTE_MEMBER_LOG(ERR, "Sketch Runtime Report Array allocation failed\n");
+		goto error_runtime;
+	}
+
+	rte_srand(ss->prim_hash_seed);
+	for (i = 0; i < ss->num_row; i++)
+		ss->hash_seeds[i] = rte_rand();
+
+	if (params->extra_flag & RTE_MEMBER_SKETCH_ALWAYS_BOUNDED)
+		ss->always_bounded = 1;
+
+	if (ss->always_bounded) {
+		double delta = 1.0 / (pow(2, ss->num_row));
+
+		ss->converge_thresh = 10 * pow(ss->error_rate, -2.0) * sqrt(log(1 / delta));
+	}
+
+	RTE_MEMBER_LOG(DEBUG, "Sketch created, "
+		"the total memory required is %u Bytes\n",  ss->num_col * ss->num_row * 8);
+
+	return 0;
+
+error_runtime:
+	rte_member_minheap_free(&runtime->heap);
+	rte_ring_free(runtime->free_key_slots);
+	rte_free(runtime->key_slots);
+error:
+	rte_free(runtime);
+	rte_free(ss);
+
+	return -ENOMEM;
+}
+
+uint64_t
+sketch_lookup_scalar(const struct rte_member_setsum *ss, const void *key)
+{
+	uint64_t *count_array = ss->table;
+	uint32_t col[ss->num_row];
+	uint64_t count_row[ss->num_row];
+	uint32_t cur_row;
+	uint64_t count;
+
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++) {
+		col[cur_row] = MEMBER_HASH_FUNC(key, ss->key_len,
+			ss->hash_seeds[cur_row]) % ss->num_col;
+
+		rte_prefetch0(&count_array[cur_row * ss->num_col + col[cur_row]]);
+	}
+
+	/* if sample rate is 1, it is a regular count-min, we report the min */
+	if (ss->sample_rate == 1 || ss->count_byte == 1)
+		return count_min(ss, col);
+
+	memset(count_row, 0, sizeof(uint64_t) * ss->num_row);
+
+	/* otherwise we report the median number */
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++)
+		count_row[cur_row] = count_array[cur_row * ss->num_col + col[cur_row]];
+
+	if (ss->num_row == 5)
+		return medianof5(count_row[0], count_row[1],
+				count_row[2], count_row[3], count_row[4]);
+
+	isort(count_row, ss->num_row);
+
+	if (ss->num_row % 2 == 0) {
+		count = (count_row[ss->num_row / 2] + count_row[ss->num_row / 2 - 1]) / 2;
+		return count;
+	}
+	/* ss->num_row % 2 != 0 */
+	count = count_row[ss->num_row / 2];
+
+	return count;
+}
+
+void
+sketch_delete_scalar(const struct rte_member_setsum *ss, const void *key)
+{
+	uint32_t col[ss->num_row];
+	uint64_t *count_array = ss->table;
+	uint32_t cur_row;
+
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++) {
+		col[cur_row] = MEMBER_HASH_FUNC(key, ss->key_len,
+			ss->hash_seeds[cur_row]) % ss->num_col;
+
+		/* set corresponding counter to 0 */
+		count_array[cur_row * ss->num_col + col[cur_row]] = 0;
+	}
+}
+
+int
+rte_member_query_sketch(const struct rte_member_setsum *ss,
+			const void *key,
+			uint64_t *output)
+{
+	uint64_t count = ss->sketch_lookup(ss, key);
+	*output = count;
+
+	return 0;
+}
+
+void
+rte_member_update_heap(const struct rte_member_setsum *ss)
+{
+	uint32_t i;
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+
+	for (i = 0; i < runtime_var->heap.size; i++) {
+		uint64_t count = ss->sketch_lookup(ss, runtime_var->heap.elem[i].key);
+
+		runtime_var->heap.elem[i].count = count;
+	}
+}
+
+int
+rte_member_report_heavyhitter_sketch(const struct rte_member_setsum *setsum,
+				     void **key,
+				     uint64_t *count)
+{
+	uint32_t i;
+	struct sketch_runtime *runtime_var = setsum->runtime_var;
+
+	rte_member_update_heap(setsum);
+	rte_member_heapsort(&(runtime_var->heap), runtime_var->report_array);
+
+	for (i = 0; i < runtime_var->heap.size; i++) {
+		key[i] = runtime_var->report_array[i].key;
+		count[i] = runtime_var->report_array[i].count;
+	}
+
+	return runtime_var->heap.size;
+}
+
+int
+rte_member_lookup_sketch(const struct rte_member_setsum *ss,
+			 const void *key, member_set_t *set_id)
+{
+	uint64_t count = ss->sketch_lookup(ss, key);
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+
+	if (runtime_var->heap.size > 0 && count >= runtime_var->heap.elem[0].count)
+		*set_id = 1;
+	else
+		*set_id = 0;
+
+	if (count == 0)
+		return 0;
+	else
+		return 1;
+}
+
+static void
+should_converge(const struct rte_member_setsum *ss)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+
+	/* For count min sketch - L1 norm */
+	if (runtime_var->pkt_cnt > ss->converge_thresh) {
+		runtime_var->converged = 1;
+		RTE_MEMBER_LOG(DEBUG, "Sketch converged, begin sampling "
+					"from key count %"PRIu64"\n",
+					runtime_var->pkt_cnt);
+	}
+}
+
+static void
+sketch_update_row(const struct rte_member_setsum *ss, const void *key,
+		  uint32_t count, uint32_t cur_row)
+{
+	uint64_t *count_array = ss->table;
+	uint32_t col = MEMBER_HASH_FUNC(key, ss->key_len,
+			ss->hash_seeds[cur_row]) % ss->num_col;
+
+	/* sketch counter update */
+	count_array[cur_row * ss->num_col + col] +=
+			ceil(count / (ss->sample_rate));
+}
+
+void
+sketch_update_scalar(const struct rte_member_setsum *ss,
+		     const void *key,
+		     uint32_t count)
+{
+	uint64_t *count_array = ss->table;
+	uint32_t col;
+	uint32_t cur_row;
+
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++) {
+		col = MEMBER_HASH_FUNC(key, ss->key_len,
+				ss->hash_seeds[cur_row]) % ss->num_col;
+		count_array[cur_row * ss->num_col + col] += count;
+	}
+}
+
+static void
+heap_update(const struct rte_member_setsum *ss, const void *key)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+	uint64_t key_cnt = 0;
+	int found;
+
+	/* We also update the heap for this key */
+	key_cnt = ss->sketch_lookup(ss, key);
+	if (key_cnt > runtime_var->heap.elem[0].count) {
+		found = rte_member_minheap_find(&runtime_var->heap, key);
+		/* the key is found in the top-k heap */
+		if (found >= 0) {
+			if (runtime_var->heap.elem[found].count < key_cnt)
+				rte_member_heapify(&runtime_var->heap, found, true);
+
+			runtime_var->heap.elem[found].count = key_cnt;
+		} else if (runtime_var->heap.size < ss->topk) {
+			rte_member_minheap_insert_node(&runtime_var->heap, key,
+				key_cnt, runtime_var->key_slots, runtime_var->free_key_slots);
+		} else {
+			rte_member_minheap_replace_node(&runtime_var->heap, key, key_cnt);
+		}
+	} else if (runtime_var->heap.size < ss->topk) {
+		found = rte_member_minheap_find(&runtime_var->heap, key);
+		if (found >= 0) {
+			if (runtime_var->heap.elem[found].count < key_cnt)
+				rte_member_heapify(&runtime_var->heap, found, true);
+
+			runtime_var->heap.elem[found].count = key_cnt;
+		} else
+			rte_member_minheap_insert_node(&runtime_var->heap, key,
+				key_cnt, runtime_var->key_slots, runtime_var->free_key_slots);
+	}
+}
+
+/*
+ * Add a single packet into the sketch.
+ * Sketch value is meatured by packet numbers in this mode.
+ */
+int
+rte_member_add_sketch(const struct rte_member_setsum *ss,
+		      const void *key,
+		      __rte_unused member_set_t set_id)
+{
+	uint32_t cur_row;
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+	uint32_t *until_next = &(runtime_var->until_next);
+
+	/*
+	 * If sketch is measured by byte count,
+	 * the rte_member_add_sketch_byte_count routine should be used.
+	 */
+	if (ss->count_byte == 1) {
+		RTE_MEMBER_LOG(ERR, "Sketch is Byte Mode, "
+			"should use rte_member_add_byte_count()!\n");
+		return -EINVAL;
+	}
+
+	if (ss->sample_rate == 1) {
+		ss->sketch_update(ss, key, 1);
+		heap_update(ss, key);
+		return 0;
+	}
+
+	/* convergence stage if it's needed */
+	if (ss->always_bounded && !runtime_var->converged) {
+		ss->sketch_update(ss, key, 1);
+
+		if (!((++runtime_var->pkt_cnt) & (INTERVAL - 1)))
+			should_converge(ss);
+
+		heap_update(ss, key);
+		return 0;
+	}
+
+	/* should we skip this packet */
+	if (*until_next >= ss->num_row) {
+		*until_next -= ss->num_row;
+		return 0;
+	}
+	cur_row = *until_next;
+	do {
+		sketch_update_row(ss, key, 1, cur_row);
+		*until_next = draw_geometric(ss);
+		if (cur_row + *until_next >= ss->num_row)
+			break;
+		cur_row += *until_next;
+	} while (1);
+
+	*until_next -= (ss->num_row - cur_row);
+
+	heap_update(ss, key);
+
+	return 0;
+}
+
+/*
+ * Add the byte count of the packet into the sketch.
+ * Sketch value is meatured by byte count numbers in this mode.
+ */
+int
+rte_member_add_sketch_byte_count(const struct rte_member_setsum *ss,
+				 const void *key,
+				 uint32_t byte_count)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+	uint32_t *until_next = &(runtime_var->until_next);
+
+	/* should not call this API if not in count byte mode */
+	if (ss->count_byte == 0) {
+		RTE_MEMBER_LOG(ERR, "Sketch is Pkt Mode, "
+			"should use rte_member_add()!\n");
+		return -EINVAL;
+	}
+
+	/* there's specific optimization for the sketch update */
+	ss->sketch_update(ss, key, byte_count);
+
+	if (*until_next != 0) {
+		*until_next = *until_next - 1;
+		return 0;
+	}
+
+	*until_next = draw_geometric(ss) - 1;
+
+	heap_update(ss, key);
+
+	return 0;
+}
+
+int
+rte_member_delete_sketch(const struct rte_member_setsum *ss,
+			 const void *key)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+	int found;
+
+	found = rte_member_minheap_find(&runtime_var->heap, key);
+	if (found < 0)
+		return -1;
+
+	ss->sketch_delete(ss, key);
+
+	return rte_member_minheap_delete_node
+		(&runtime_var->heap, key, runtime_var->key_slots, runtime_var->free_key_slots);
+}
+
+void
+rte_member_free_sketch(struct rte_member_setsum *ss)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+
+	rte_free(ss->table);
+	rte_member_minheap_free(&runtime_var->heap);
+	rte_free(runtime_var->key_slots);
+	rte_ring_free(runtime_var->free_key_slots);
+	rte_free(runtime_var);
+}
+
+void
+rte_member_reset_sketch(const struct rte_member_setsum *ss)
+{
+	struct sketch_runtime *runtime_var = ss->runtime_var;
+	uint64_t *sketch = ss->table;
+	uint32_t i;
+
+	memset(sketch, 0, sizeof(uint64_t) * ss->num_col * ss->num_row);
+	rte_member_minheap_reset(&runtime_var->heap);
+	rte_ring_reset(runtime_var->free_key_slots);
+
+	for (i = 0; i < ss->topk; i++)
+		rte_ring_sp_enqueue_elem(runtime_var->free_key_slots, &i, sizeof(uint32_t));
+}
diff --git a/lib/member/rte_member_sketch.h b/lib/member/rte_member_sketch.h
new file mode 100644
index 0000000000..219323008b
--- /dev/null
+++ b/lib/member/rte_member_sketch.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#ifndef _RTE_MEMBER_SKETCH_H_
+#define _RTE_MEMBER_SKETCH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_vect.h>
+#include <rte_ring_elem.h>
+
+#define NUM_ROW_SCALAR 5
+#define INTERVAL (1 << 15)
+
+#if !RTE_IS_POWER_OF_2(INTERVAL)
+#error sketch INTERVAL macro must be a power of 2
+#endif
+
+int
+rte_member_create_sketch(struct rte_member_setsum *ss,
+			 const struct rte_member_parameters *params,
+			 struct rte_ring *r);
+
+int
+rte_member_lookup_sketch(const struct rte_member_setsum *setsum,
+			 const void *key, member_set_t *set_id);
+
+int
+rte_member_add_sketch(const struct rte_member_setsum *setsum,
+		      const void *key,
+		      member_set_t set_id);
+
+int
+rte_member_add_sketch_byte_count(const struct rte_member_setsum *ss,
+				 const void *key, uint32_t byte_count);
+
+void
+sketch_update_scalar(const struct rte_member_setsum *ss,
+		     const void *key,
+		     uint32_t count);
+
+uint64_t
+sketch_lookup_scalar(const struct rte_member_setsum *ss,
+		     const void *key);
+
+void
+sketch_delete_scalar(const struct rte_member_setsum *ss,
+		     const void *key);
+
+int
+rte_member_delete_sketch(const struct rte_member_setsum *setsum,
+			 const void *key);
+
+int
+rte_member_query_sketch(const struct rte_member_setsum *setsum,
+			const void *key, uint64_t *output);
+
+void
+rte_member_free_sketch(struct rte_member_setsum *ss);
+
+void
+rte_member_reset_sketch(const struct rte_member_setsum *setsum);
+
+int
+rte_member_report_heavyhitter_sketch(const struct rte_member_setsum *setsum,
+				     void **key, uint64_t *count);
+
+void
+rte_member_update_heap(const struct rte_member_setsum *ss);
+
+static __rte_always_inline uint64_t
+count_min(const struct rte_member_setsum *ss, const uint32_t *hash_results)
+{
+	uint64_t *count_array = ss->table;
+	uint64_t count;
+	uint32_t cur_row;
+	uint64_t min = UINT64_MAX;
+
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++) {
+		uint64_t cnt = count_array[cur_row * ss->num_col + hash_results[cur_row]];
+
+		if (cnt < min)
+			min = cnt;
+	}
+	count = min;
+
+	return count;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_SKETCH_H_ */
diff --git a/lib/member/rte_member_sketch_avx512.c b/lib/member/rte_member_sketch_avx512.c
new file mode 100644
index 0000000000..c83f4b6fd1
--- /dev/null
+++ b/lib/member/rte_member_sketch_avx512.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "rte_member_sketch_avx512.h"
+
+__rte_always_inline void
+sketch_update_avx512(const struct rte_member_setsum *ss,
+		     const void *key,
+		     uint32_t count)
+{
+	uint64_t *count_array = ss->table;
+	uint32_t num_col = ss->num_col;
+	uint32_t key_len = ss->key_len;
+	__m256i v_row_base;
+	__m256i v_hash_result;
+	__m512i current_sketch;
+	__m512i updated_sketch;
+	__m512i v_count;
+
+	const __m256i v_idx = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+	const __m256i v_col = _mm256_set1_epi32(num_col);
+
+	/* compute the hash result parallelly */
+	v_hash_result = rte_xxh64_sketch_avx512
+		(key, key_len, *(__m512i *)ss->hash_seeds, num_col);
+	v_row_base = _mm256_mullo_epi32(v_idx, v_col);
+	v_hash_result = _mm256_add_epi32(v_row_base, v_hash_result);
+
+	current_sketch =
+		_mm512_i32gather_epi64(v_hash_result, count_array, 8);
+	v_count = _mm512_set1_epi64(count);
+	updated_sketch = _mm512_add_epi64(current_sketch, v_count);
+	_mm512_i32scatter_epi64
+		((void *)count_array, v_hash_result, updated_sketch, 8);
+}
+
+uint64_t
+sketch_lookup_avx512(const struct rte_member_setsum *ss, const void *key)
+{
+	uint32_t col[ss->num_row];
+
+	/* currently only for sketch byte count mode */
+	__m256i v_hash_result = rte_xxh64_sketch_avx512
+		(key, ss->key_len, *(__m512i *)ss->hash_seeds, ss->num_col);
+	_mm256_storeu_si256((__m256i *)col, v_hash_result);
+
+	return count_min(ss, col);
+}
+
+void
+sketch_delete_avx512(const struct rte_member_setsum *ss, const void *key)
+{
+	uint32_t col[ss->num_row];
+	uint64_t *count_array = ss->table;
+	uint64_t min = UINT64_MAX;
+	uint32_t cur_row;
+
+	__m256i v_hash_result = rte_xxh64_sketch_avx512
+		(key, ss->key_len, *(__m512i *)ss->hash_seeds,
+		 RTE_ALIGN_FLOOR(ss->num_col, 32));
+	_mm256_storeu_si256((__m256i *)col, v_hash_result);
+
+	min = count_min(ss, col);
+
+	/* subtract the min value from all the counters */
+	for (cur_row = 0; cur_row < ss->num_row; cur_row++)
+		count_array[cur_row * ss->num_col + col[cur_row]] -= min;
+}
diff --git a/lib/member/rte_member_sketch_avx512.h b/lib/member/rte_member_sketch_avx512.h
new file mode 100644
index 0000000000..e7c25da643
--- /dev/null
+++ b/lib/member/rte_member_sketch_avx512.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_MEMBER_SKETCH_AVX512_H_
+#define _RTE_MEMBER_SKETCH_AVX512_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_vect.h>
+#include "rte_member.h"
+#include "rte_member_sketch.h"
+#include "rte_xxh64_avx512.h"
+
+#define NUM_ROW_VEC 8
+
+void
+sketch_update_avx512(const struct rte_member_setsum *ss,
+		     const void *key,
+		     uint32_t count);
+
+uint64_t
+sketch_lookup_avx512(const struct rte_member_setsum *ss,
+		     const void *key);
+
+void
+sketch_delete_avx512(const struct rte_member_setsum *ss,
+		     const void *key);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_SKETCH_AVX512_H_ */
diff --git a/lib/member/rte_xxh64_avx512.h b/lib/member/rte_xxh64_avx512.h
new file mode 100644
index 0000000000..50ca1b52c7
--- /dev/null
+++ b/lib/member/rte_xxh64_avx512.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef _RTE_XXH64_AVX512_H_
+#define _RTE_XXH64_AVX512_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <immintrin.h>
+
+/* 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL;
+/* 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
+/* 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
+/* 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL;
+/* 0b0010011111010100111010110010111100010110010101100110011111000101 */
+static const uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL;
+
+static __rte_always_inline  __m512i
+xxh64_round_avx512(__m512i hash, __m512i input)
+{
+	hash = _mm512_madd52lo_epu64(hash,
+			input,
+			_mm512_set1_epi64(PRIME64_2));
+
+	hash = _mm512_rol_epi64(hash, 31);
+
+	return hash;
+}
+
+static __rte_always_inline  __m512i
+xxh64_fmix_avx512(__m512i hash)
+{
+	hash = _mm512_xor_si512(hash, _mm512_srli_epi64(hash, 33));
+
+	return hash;
+}
+
+static __rte_always_inline __m256i
+rte_xxh64_sketch_avx512(const void *key, uint32_t key_len,
+			__m512i v_seed, uint32_t modulo)
+{
+	__m512i v_prime64_5, v_hash;
+	size_t remaining = key_len;
+	size_t offset = 0;
+	__m512i input;
+
+	v_prime64_5 = _mm512_set1_epi64(PRIME64_5);
+	v_hash = _mm512_add_epi64
+			(_mm512_add_epi64(v_seed, v_prime64_5),
+			 _mm512_set1_epi64(key_len));
+
+	while (remaining >= 8) {
+		input = _mm512_set1_epi64(*(uint64_t *)RTE_PTR_ADD(key, offset));
+		v_hash = _mm512_xor_epi64(v_hash,
+				xxh64_round_avx512(_mm512_setzero_si512(), input));
+		v_hash = _mm512_madd52lo_epu64(_mm512_set1_epi64(PRIME64_4),
+				v_hash,
+				_mm512_set1_epi64(PRIME64_1));
+
+		remaining -= 8;
+		offset += 8;
+	}
+
+	if (remaining >= 4) {
+		input = _mm512_set1_epi64
+			(*(uint32_t *)RTE_PTR_ADD(key, offset));
+		v_hash = _mm512_xor_epi64(v_hash,
+			_mm512_mullo_epi64(input,
+				_mm512_set1_epi64(PRIME64_1)));
+		v_hash = _mm512_madd52lo_epu64
+				(_mm512_set1_epi64(PRIME64_3),
+				_mm512_rol_epi64(v_hash, 23),
+				_mm512_set1_epi64(PRIME64_2));
+
+		offset += 4;
+		remaining -= 4;
+	}
+
+	while (remaining != 0) {
+		input = _mm512_set1_epi64
+			(*(uint8_t *)RTE_PTR_ADD(key, offset));
+		v_hash = _mm512_xor_epi64(v_hash,
+			_mm512_mullo_epi64(input,
+				_mm512_set1_epi64(PRIME64_5)));
+		v_hash = _mm512_mullo_epi64
+			(_mm512_rol_epi64(v_hash, 11),
+			_mm512_set1_epi64(PRIME64_1));
+		offset++;
+		remaining--;
+	}
+
+	v_hash = xxh64_fmix_avx512(v_hash);
+
+	/*
+	 * theoritically, such modular operations can be replaced by
+	 * _mm512_rem_epi64(), but seems it depends on the compiler's
+	 * implementation. so here is the limitation that the modulo
+	 * value should be power of 2.
+	 */
+	__m512i v_hash_remainder = _mm512_set1_epi64((modulo - 1));
+
+	return _mm512_cvtepi64_epi32(_mm512_and_si512(v_hash, v_hash_remainder));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_XXH64_AVX512_H_ */
diff --git a/lib/member/version.map b/lib/member/version.map
index 19469c6aba..35a811d0ac 100644
--- a/lib/member/version.map
+++ b/lib/member/version.map
@@ -2,6 +2,9 @@ DPDK_23 {
 	global:
 
 	rte_member_add;
+	rte_member_add_byte_count;
+	rte_member_query_count;
+	rte_member_report_heavyhitter;
 	rte_member_create;
 	rte_member_delete;
 	rte_member_find_existing;
-- 
2.25.1


  reply	other threads:[~2022-08-31  6:47 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-10  7:45 [PATCH 0/2] introduce NitroSketch Mode into membership library Leyi Rong
2022-08-10  7:45 ` [PATCH 1/2] member: implement NitroSketch mode Leyi Rong
2022-08-16  8:59   ` Ferruh Yigit
2022-08-26  2:32     ` Rong, Leyi
2022-08-10  7:45 ` [PATCH 2/2] test/member: add functional and perf tests for sketch Leyi Rong
2022-08-11  2:33   ` Suanming Mou
2022-08-26  2:26     ` Rong, Leyi
2022-08-31  6:46 ` [PATCH v2 0/2] introduce NitroSketch Mode into membership library Leyi Rong
2022-08-31  6:46   ` Leyi Rong [this message]
2022-09-13 14:56     ` [PATCH v2 1/2] member: implement NitroSketch mode David Marchand
2022-09-14  9:42       ` Rong, Leyi
2022-08-31  6:46   ` [PATCH v2 2/2] test/member: add functional and perf tests for sketch Leyi Rong
2022-09-15  2:14 ` [PATCH v3 0/2] introduce NitroSketch Mode into membership library Leyi Rong
2022-09-15  2:14   ` [PATCH v3 1/2] member: implement NitroSketch mode Leyi Rong
2022-09-15  7:47     ` David Marchand
2022-09-15 13:02       ` Rong, Leyi
2022-09-15  2:14   ` [PATCH v3 2/2] test/member: add functional and perf tests for sketch Leyi Rong
2022-09-15  7:30     ` Jiang, YuX
2022-09-15  7:41       ` David Marchand
2022-09-15 13:03 ` [PATCH v4 0/2] introduce NitroSketch Mode into membership library Leyi Rong
2022-09-15 13:03   ` [PATCH v4 1/2] member: implement NitroSketch mode Leyi Rong
2022-09-15 14:12     ` Bruce Richardson
2022-09-16  2:30       ` Rong, Leyi
2022-09-15 13:03   ` [PATCH v4 2/2] test/member: add functional and perf tests for sketch Leyi Rong
2022-09-16  3:03 ` [PATCH v5 0/2] introduce NitroSketch Mode into membership library Leyi Rong
2022-09-16  3:03   ` [PATCH v5 1/2] member: implement NitroSketch mode Leyi Rong
2022-09-22  3:00     ` Jiang, YuX
2022-10-03 12:37     ` Thomas Monjalon
2022-10-08  3:36       ` Rong, Leyi
2022-09-16  3:03   ` [PATCH v5 2/2] test/member: add functional and perf tests for sketch Leyi Rong
2022-10-09 23:13   ` [PATCH v5 0/2] introduce NitroSketch Mode into membership library Thomas Monjalon
2022-10-10  2:36     ` Rong, Leyi
2022-10-10  7:30       ` Thomas Monjalon
2022-10-10  8:28         ` Rong, Leyi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220831064639.4163765-2-leyi.rong@intel.com \
    --to=leyi.rong@intel.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@xilinx.com \
    --cc=sameh.gobriel@intel.com \
    --cc=suanmingm@nvidia.com \
    --cc=yipeng1.wang@intel.com \
    --cc=zaoxingliu@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).