From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from dpdk.org (dpdk.org [92.243.14.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 0B7DEA04A5;
	Thu, 18 Jun 2020 09:25:01 +0200 (CEST)
Received: from [92.243.14.124] (localhost [127.0.0.1])
	by dpdk.org (Postfix) with ESMTP id F33B41BE88;
	Thu, 18 Jun 2020 09:24:54 +0200 (CEST)
Received: from git-send-mailer.rdmz.labs.mlnx (unknown [37.142.13.130])
 by dpdk.org (Postfix) with ESMTP id 3DF37F12
 for <dev@dpdk.org>; Thu, 18 Jun 2020 09:24:53 +0200 (CEST)
From: Suanming Mou <suanmingm@mellanox.com>
To: viacheslavo@mellanox.com,
	matan@mellanox.com
Cc: rasland@mellanox.com,
	dev@dpdk.org
Date: Thu, 18 Jun 2020 15:24:42 +0800
Message-Id: <1592465084-140601-2-git-send-email-suanmingm@mellanox.com>
X-Mailer: git-send-email 1.8.3.1
In-Reply-To: <1592465084-140601-1-git-send-email-suanmingm@mellanox.com>
References: <1592465084-140601-1-git-send-email-suanmingm@mellanox.com>
Subject: [dpdk-dev] [PATCH 1/3] net/mlx5: add Three-Level table utility
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

For the case which data is linked with sequence increased index, the
array table will be more efficient than hash table once need to search
one data entry in large numbers of entries. Since the traditional hash
tables has fixed table size, when huge numbers of data saved to the hash
table, it also comes lots of hash conflict.

But simple array table also has fixed size, allocates all the needed
memory at once will waste lots of memory. For the case don't know the
exactly number of entries will be impossible to allocate the array.

Then the multiple level table helps to balance the two disadvantages.
Allocate a global high level table with sub table entries at first,
the global table contains the sub table entries, and the sub table will
be allocated only once the corresponding index entry need to be saved.
e.g. for up to 32-bits index, three level table with 10-10-12 splitting,
with sequence increased index, the memory grows with every 4K entries.

The currently implementation introduces 10-10-12 32-bits splitting
Three-Level table to help the cases which have millions of entries to
save. The index entries can be addressed directly by the index, no
search will be needed.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5_utils.c | 276 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_utils.h | 165 +++++++++++++++++++++++++
 2 files changed, 441 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index d29fbcb..5c76b29 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -482,3 +482,279 @@ struct mlx5_indexed_pool *
 	       pool->trunk_empty, pool->trunk_avail, pool->trunk_free);
 #endif
 }
+
+struct mlx5_l3t_tbl *
+mlx5_l3t_create(enum mlx5_l3t_type type)
+{
+	struct mlx5_l3t_tbl *tbl;
+	struct mlx5_indexed_pool_config l3t_ip_cfg = {
+		.trunk_size = 16,
+		.grow_trunk = 6,
+		.grow_shift = 1,
+		.need_lock = 0,
+		.release_mem_en = 1,
+		.malloc = rte_malloc_socket,
+		.free = rte_free,
+	};
+
+	if (type >= MLX5_L3T_TYPE_MAX) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	tbl = rte_zmalloc(NULL, sizeof(struct mlx5_l3t_tbl), 1);
+	if (!tbl) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	tbl->type = type;
+	switch (type) {
+	case MLX5_L3T_TYPE_WORD:
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word) +
+				  sizeof(uint16_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
+		break;
+	case MLX5_L3T_TYPE_DWORD:
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword) +
+				  sizeof(uint32_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
+		break;
+	case MLX5_L3T_TYPE_QWORD:
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword) +
+				  sizeof(uint64_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
+		break;
+	default:
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr) +
+				  sizeof(void *) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
+		break;
+	}
+	tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
+	if (!tbl->eip) {
+		rte_errno = ENOMEM;
+		rte_free(tbl);
+		tbl = NULL;
+	}
+	return tbl;
+}
+
+void
+mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl)
+{
+	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	uint32_t i, j;
+
+	if (!tbl)
+		return;
+	g_tbl = tbl->tbl;
+	if (g_tbl) {
+		for (i = 0; i < MLX5_L3T_GT_SIZE; i++) {
+			m_tbl = g_tbl->tbl[i];
+			if (!m_tbl)
+				continue;
+			for (j = 0; j < MLX5_L3T_MT_SIZE; j++) {
+				if (!m_tbl->tbl[j])
+					continue;
+				MLX5_ASSERT(!((struct mlx5_l3t_entry_word *)
+					    m_tbl->tbl[j])->ref_cnt);
+				mlx5_ipool_free(tbl->eip,
+						((struct mlx5_l3t_entry_word *)
+						m_tbl->tbl[j])->idx);
+				m_tbl->tbl[j] = 0;
+				if (!(--m_tbl->ref_cnt))
+					break;
+			}
+			MLX5_ASSERT(!m_tbl->ref_cnt);
+			rte_free(g_tbl->tbl[i]);
+			g_tbl->tbl[i] = 0;
+			if (!(--g_tbl->ref_cnt))
+				break;
+		}
+		MLX5_ASSERT(!g_tbl->ref_cnt);
+		rte_free(tbl->tbl);
+		tbl->tbl = 0;
+	}
+	mlx5_ipool_destroy(tbl->eip);
+	rte_free(tbl);
+}
+
+uint32_t
+mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	void *e_tbl;
+	uint32_t entry_idx;
+
+	g_tbl = tbl->tbl;
+	if (!g_tbl)
+		return -1;
+	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
+	if (!m_tbl)
+		return -1;
+	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
+	if (!e_tbl)
+		return -1;
+	entry_idx = idx & MLX5_L3T_ET_MASK;
+	switch (tbl->type) {
+	case MLX5_L3T_TYPE_WORD:
+		data->word = ((struct mlx5_l3t_entry_word *)e_tbl)->entry
+			     [entry_idx];
+		break;
+	case MLX5_L3T_TYPE_DWORD:
+		data->dword = ((struct mlx5_l3t_entry_dword *)e_tbl)->entry
+			     [entry_idx];
+		break;
+	case MLX5_L3T_TYPE_QWORD:
+		data->qword = ((struct mlx5_l3t_entry_qword *)e_tbl)->entry
+			      [entry_idx];
+		break;
+	default:
+		data->ptr = ((struct mlx5_l3t_entry_ptr *)e_tbl)->entry
+			    [entry_idx];
+		break;
+	}
+	return 0;
+}
+
+void
+mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
+{
+	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	struct mlx5_l3t_entry_word *w_e_tbl;
+	struct mlx5_l3t_entry_dword *dw_e_tbl;
+	struct mlx5_l3t_entry_qword *qw_e_tbl;
+	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
+	void *e_tbl;
+	uint32_t entry_idx;
+	uint64_t ref_cnt;
+
+	g_tbl = tbl->tbl;
+	if (!g_tbl)
+		return;
+	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
+	if (!m_tbl)
+		return;
+	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
+	if (!e_tbl)
+		return;
+	entry_idx = idx & MLX5_L3T_ET_MASK;
+	switch (tbl->type) {
+	case MLX5_L3T_TYPE_WORD:
+		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
+		w_e_tbl->entry[entry_idx] = 0;
+		ref_cnt = --w_e_tbl->ref_cnt;
+		break;
+	case MLX5_L3T_TYPE_DWORD:
+		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
+		dw_e_tbl->entry[entry_idx] = 0;
+		ref_cnt = --dw_e_tbl->ref_cnt;
+		break;
+	case MLX5_L3T_TYPE_QWORD:
+		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
+		qw_e_tbl->entry[entry_idx] = 0;
+		ref_cnt = --qw_e_tbl->ref_cnt;
+		break;
+	default:
+		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
+		ptr_e_tbl->entry[entry_idx] = NULL;
+		ref_cnt = --ptr_e_tbl->ref_cnt;
+		break;
+	}
+	if (!ref_cnt) {
+		mlx5_ipool_free(tbl->eip,
+				((struct mlx5_l3t_entry_word *)e_tbl)->idx);
+		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
+									NULL;
+		if (!(--m_tbl->ref_cnt)) {
+			rte_free(m_tbl);
+			g_tbl->tbl
+			[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] = NULL;
+			if (!(--g_tbl->ref_cnt)) {
+				rte_free(g_tbl);
+				tbl->tbl = 0;
+			}
+		}
+	}
+}
+
+uint32_t
+mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	struct mlx5_l3t_entry_word *w_e_tbl;
+	struct mlx5_l3t_entry_dword *dw_e_tbl;
+	struct mlx5_l3t_entry_qword *qw_e_tbl;
+	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
+	void *e_tbl;
+	uint32_t entry_idx, tbl_idx = 0;
+
+	/* Check the global table, create it if empty. */
+	g_tbl = tbl->tbl;
+	if (!g_tbl) {
+		g_tbl = rte_zmalloc(NULL, sizeof(struct mlx5_l3t_level_tbl) +
+				    sizeof(void *) * MLX5_L3T_GT_SIZE, 1);
+		if (!g_tbl) {
+			rte_errno = ENOMEM;
+			return -1;
+		}
+		tbl->tbl = g_tbl;
+	}
+	/*
+	 * Check the middle table, create it if empty. Ref_cnt will be
+	 * increased if new sub table created.
+	 */
+	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
+	if (!m_tbl) {
+		m_tbl = rte_zmalloc(NULL, sizeof(struct mlx5_l3t_level_tbl) +
+				    sizeof(void *) * MLX5_L3T_MT_SIZE, 1);
+		if (!m_tbl) {
+			rte_errno = ENOMEM;
+			return -1;
+		}
+		g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK] =
+									m_tbl;
+		g_tbl->ref_cnt++;
+	}
+	/*
+	 * Check the entry table, create it if empty. Ref_cnt will be
+	 * increased if new sub entry table created.
+	 */
+	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
+	if (!e_tbl) {
+		e_tbl = mlx5_ipool_zmalloc(tbl->eip, &tbl_idx);
+		if (!e_tbl) {
+			rte_errno = ENOMEM;
+			return -1;
+		}
+		((struct mlx5_l3t_entry_word *)e_tbl)->idx = tbl_idx;
+		m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK] =
+									e_tbl;
+		m_tbl->ref_cnt++;
+	}
+	entry_idx = idx & MLX5_L3T_ET_MASK;
+	switch (tbl->type) {
+	case MLX5_L3T_TYPE_WORD:
+		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
+		w_e_tbl->entry[entry_idx] = data->word;
+		w_e_tbl->ref_cnt++;
+		break;
+	case MLX5_L3T_TYPE_DWORD:
+		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
+		dw_e_tbl->entry[entry_idx] = data->dword;
+		dw_e_tbl->ref_cnt++;
+		break;
+	case MLX5_L3T_TYPE_QWORD:
+		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
+		qw_e_tbl->entry[entry_idx] = data->qword;
+		qw_e_tbl->ref_cnt++;
+		break;
+	default:
+		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
+		ptr_e_tbl->entry[entry_idx] = data->ptr;
+		ptr_e_tbl->ref_cnt++;
+		break;
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index f4ec151..18cfc2c 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -55,6 +55,106 @@
 	 (((val) & (from)) * ((to) / (from))))
 
 /*
+ * For the case which data is linked with sequence increased index, the
+ * array table will be more efficiect than hash table once need to serarch
+ * one data entry in large numbers of entries. Since the traditional hash
+ * tables has fixed table size, when huge numbers of data saved to the hash
+ * table, it also comes lots of hash conflict.
+ *
+ * But simple array table also has fixed size, allocates all the needed
+ * memory at once will waste lots of memory. For the case don't know the
+ * exactly number of entries will be impossible to allocate the array.
+ *
+ * Then the mulitple level table helps to balance the two disadvantages.
+ * Allocate a global high level table with sub table entries at first,
+ * the global table contains the sub table entries, and the sub table will
+ * be allocated only once the corresponding index entry need to be saved.
+ * e.g. for up to 32-bits index, three level table with 10-10-12 splitting,
+ * with sequence increased index, the memory grows with every 4K entries.
+ *
+ * The currently implementation introduces 10-10-12 32-bits splitting
+ * Three-Level table to help the cases which have millions of enties to
+ * save. The index entries can be addressed directly by the index, no
+ * search will be needed.q
+ */
+
+/* L3 table global table define. */
+#define MLX5_L3T_GT_OFFSET 22
+#define MLX5_L3T_GT_SIZE (1 << 10)
+#define MLX5_L3T_GT_MASK (MLX5_L3T_GT_SIZE - 1)
+
+/* L3 table middle table define. */
+#define MLX5_L3T_MT_OFFSET 12
+#define MLX5_L3T_MT_SIZE (1 << 10)
+#define MLX5_L3T_MT_MASK (MLX5_L3T_MT_SIZE - 1)
+
+/* L3 table entry table define. */
+#define MLX5_L3T_ET_OFFSET 0
+#define MLX5_L3T_ET_SIZE (1 << 12)
+#define MLX5_L3T_ET_MASK (MLX5_L3T_ET_SIZE - 1)
+
+/* L3 table type. */
+enum mlx5_l3t_type {
+	MLX5_L3T_TYPE_WORD = 0,
+	MLX5_L3T_TYPE_DWORD,
+	MLX5_L3T_TYPE_QWORD,
+	MLX5_L3T_TYPE_PTR,
+	MLX5_L3T_TYPE_MAX,
+};
+
+struct mlx5_indexed_pool;
+
+/* Generic data struct. */
+union mlx5_l3t_data {
+	uint16_t word;
+	uint32_t dword;
+	uint64_t qword;
+	void *ptr;
+};
+
+/* L3 level table data structure. */
+struct mlx5_l3t_level_tbl {
+	uint64_t ref_cnt; /* Table ref_cnt. */
+	void *tbl[]; /* Table array. */
+};
+
+/* L3 word entry table data structure. */
+struct mlx5_l3t_entry_word {
+	uint32_t idx; /* Table index. */
+	uint64_t ref_cnt; /* Table ref_cnt. */
+	uint16_t entry[]; /* Entry arrary. */
+};
+
+/* L3 double word entry table data structure. */
+struct mlx5_l3t_entry_dword {
+	uint32_t idx; /* Table index. */
+	uint64_t ref_cnt; /* Table ref_cnt. */
+	uint32_t entry[]; /* Entry arrary. */
+};
+
+/* L3 quad word entry table data structure. */
+struct mlx5_l3t_entry_qword {
+	uint32_t idx; /* Table index. */
+	uint64_t ref_cnt; /* Table ref_cnt. */
+	uint64_t entry[]; /* Entry arrary. */
+};
+
+/* L3 pointer entry table data structure. */
+struct mlx5_l3t_entry_ptr {
+	uint32_t idx; /* Table index. */
+	uint64_t ref_cnt; /* Table ref_cnt. */
+	void *entry[]; /* Entry arrary. */
+};
+
+/* L3 table data structure. */
+struct mlx5_l3t_tbl {
+	enum mlx5_l3t_type type; /* Table type. */
+	struct mlx5_indexed_pool *eip;
+	/* Table index pool handles. */
+	struct mlx5_l3t_level_tbl *tbl; /* Global table index. */
+};
+
+/*
  * The indexed memory entry index is made up of trunk index and offset of
  * the entry in the trunk. Since the entry index is 32 bits, in case user
  * prefers to have small trunks, user can change the macro below to a big
@@ -345,6 +445,71 @@ struct mlx5_indexed_pool *
  */
 void mlx5_ipool_dump(struct mlx5_indexed_pool *pool);
 
+/**
+ * This function allocates new empty Three-level table.
+ *
+ * @param type
+ *   The l3t can set as word, double word, quad word or pointer with index.
+ *
+ * @return
+ *   - Pointer to the allocated l3t.
+ *   - NULL on error. Not enough memory, or invalid arguments.
+ */
+struct mlx5_l3t_tbl *mlx5_l3t_create(enum mlx5_l3t_type type);
+
+/**
+ * This function destroys Three-level table.
+ *
+ * @param tbl
+ *   Pointer to the l3t.
+ */
+void mlx5_l3t_destroy(struct mlx5_l3t_tbl *tbl);
+
+/**
+ * This function gets the index entry from Three-level table.
+ *
+ * @param tbl
+ *   Pointer to the l3t.
+ * @param idx
+ *   Index to the entry.
+ * @param data
+ *   Pointer to the memory which saves the entry data.
+ *   When function call returns 0, data contains the entry data get from
+ *   l3t.
+ *   When function call returns -1, data is not modified.
+ *
+ * @return
+ *   0 if success, -1 on error.
+ */
+
+uint32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+			    union mlx5_l3t_data *data);
+/**
+ * This function clears the index entry from Three-level table.
+ *
+ * @param tbl
+ *   Pointer to the l3t.
+ * @param idx
+ *   Index to the entry.
+ */
+void mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
+
+/**
+ * This function gets the index entry from Three-level table.
+ *
+ * @param tbl
+ *   Pointer to the l3t.
+ * @param idx
+ *   Index to the entry.
+ * @param data
+ *   Pointer to the memory which contains the entry data save to l3t.
+ *
+ * @return
+ *   0 if success, -1 on error.
+ */
+uint32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+			    union mlx5_l3t_data *data);
+
 /*
  * Macros for linked list based on indexed memory.
  * Example data structure:
-- 
1.8.3.1