DPDK patches and discussions
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@nvidia.com>
To: Matan Azrad <matan@nvidia.com>,
	Shahaf Shuler <shahafs@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 5/8] net/mlx5: make three level table thread safe
Date: Tue, 20 Oct 2020 11:02:25 +0800
Message-ID: <1603162949-150001-6-git-send-email-suanmingm@nvidia.com> (raw)
In-Reply-To: <1603162949-150001-1-git-send-email-suanmingm@nvidia.com>

This commit adds thread safety support in three level table using
spinlock and reference counter for each table entry.

An new mlx5_l3t_prepare_entry() function is added in order to support
multiple-thread operation.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5_utils.c | 191 ++++++++++++++++++++++++++++++++++--------
 drivers/net/mlx5/mlx5_utils.h |  81 ++++++++++++++----
 2 files changed, 224 insertions(+), 48 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index fefe833..9a54fda 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -551,26 +551,23 @@ struct mlx5_l3t_tbl *
 	tbl->type = type;
 	switch (type) {
 	case MLX5_L3T_TYPE_WORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word) +
-				  sizeof(uint16_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
 		break;
 	case MLX5_L3T_TYPE_DWORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword) +
-				  sizeof(uint32_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
 		break;
 	case MLX5_L3T_TYPE_QWORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword) +
-				  sizeof(uint64_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
 		break;
 	default:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr) +
-				  sizeof(void *) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
 		break;
 	}
+	rte_spinlock_init(&tbl->sl);
 	tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
 	if (!tbl->eip) {
 		rte_errno = ENOMEM;
@@ -620,11 +617,15 @@ struct mlx5_l3t_tbl *
 	mlx5_free(tbl);
 }
 
-uint32_t
-mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
-		   union mlx5_l3t_data *data)
+static int32_t
+__l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		union mlx5_l3t_data *data)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	struct mlx5_l3t_entry_word *w_e_tbl;
+	struct mlx5_l3t_entry_dword *dw_e_tbl;
+	struct mlx5_l3t_entry_qword *qw_e_tbl;
+	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
 	void *e_tbl;
 	uint32_t entry_idx;
 
@@ -640,26 +641,46 @@ struct mlx5_l3t_tbl *
 	entry_idx = idx & MLX5_L3T_ET_MASK;
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
-		data->word = ((struct mlx5_l3t_entry_word *)e_tbl)->entry
-			     [entry_idx];
+		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
+		data->word = w_e_tbl->entry[entry_idx].data;
+		if (w_e_tbl->entry[entry_idx].data)
+			w_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
-		data->dword = ((struct mlx5_l3t_entry_dword *)e_tbl)->entry
-			     [entry_idx];
+		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
+		data->dword = dw_e_tbl->entry[entry_idx].data;
+		if (dw_e_tbl->entry[entry_idx].data)
+			dw_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
-		data->qword = ((struct mlx5_l3t_entry_qword *)e_tbl)->entry
-			      [entry_idx];
+		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
+		data->qword = qw_e_tbl->entry[entry_idx].data;
+		if (qw_e_tbl->entry[entry_idx].data)
+			qw_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	default:
-		data->ptr = ((struct mlx5_l3t_entry_ptr *)e_tbl)->entry
-			    [entry_idx];
+		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
+		data->ptr = ptr_e_tbl->entry[entry_idx].data;
+		if (ptr_e_tbl->entry[entry_idx].data)
+			ptr_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	}
 	return 0;
 }
 
-void
+int32_t
+mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	int ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	ret = __l3t_get_entry(tbl, idx, data);
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
+
+int32_t
 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
@@ -670,36 +691,54 @@ struct mlx5_l3t_tbl *
 	void *e_tbl;
 	uint32_t entry_idx;
 	uint64_t ref_cnt;
+	int32_t ret = -1;
 
+	rte_spinlock_lock(&tbl->sl);
 	g_tbl = tbl->tbl;
 	if (!g_tbl)
-		return;
+		goto out;
 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
 	if (!m_tbl)
-		return;
+		goto out;
 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
 	if (!e_tbl)
-		return;
+		goto out;
 	entry_idx = idx & MLX5_L3T_ET_MASK;
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
-		w_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --w_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		w_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --w_e_tbl->ref_cnt;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
-		dw_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		dw_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --dw_e_tbl->ref_cnt;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
-		qw_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		qw_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --qw_e_tbl->ref_cnt;
 		break;
 	default:
 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
-		ptr_e_tbl->entry[entry_idx] = NULL;
+		MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		ptr_e_tbl->entry[entry_idx].data = NULL;
 		ref_cnt = --ptr_e_tbl->ref_cnt;
 		break;
 	}
@@ -718,11 +757,14 @@ struct mlx5_l3t_tbl *
 			}
 		}
 	}
+out:
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
 }
 
-uint32_t
-mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
-		   union mlx5_l3t_data *data)
+static int32_t
+__l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		union mlx5_l3t_data *data)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
 	struct mlx5_l3t_entry_word *w_e_tbl;
@@ -783,24 +825,105 @@ struct mlx5_l3t_tbl *
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
-		w_e_tbl->entry[entry_idx] = data->word;
+		if (w_e_tbl->entry[entry_idx].data) {
+			data->word = w_e_tbl->entry[entry_idx].data;
+			w_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		w_e_tbl->entry[entry_idx].data = data->word;
+		w_e_tbl->entry[entry_idx].ref_cnt = 1;
 		w_e_tbl->ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
-		dw_e_tbl->entry[entry_idx] = data->dword;
+		if (dw_e_tbl->entry[entry_idx].data) {
+			data->dword = dw_e_tbl->entry[entry_idx].data;
+			dw_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		dw_e_tbl->entry[entry_idx].data = data->dword;
+		dw_e_tbl->entry[entry_idx].ref_cnt = 1;
 		dw_e_tbl->ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
-		qw_e_tbl->entry[entry_idx] = data->qword;
+		if (qw_e_tbl->entry[entry_idx].data) {
+			data->qword = qw_e_tbl->entry[entry_idx].data;
+			qw_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		qw_e_tbl->entry[entry_idx].data = data->qword;
+		qw_e_tbl->entry[entry_idx].ref_cnt = 1;
 		qw_e_tbl->ref_cnt++;
 		break;
 	default:
 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
-		ptr_e_tbl->entry[entry_idx] = data->ptr;
+		if (ptr_e_tbl->entry[entry_idx].data) {
+			data->ptr = ptr_e_tbl->entry[entry_idx].data;
+			ptr_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		ptr_e_tbl->entry[entry_idx].data = data->ptr;
+		ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
 		ptr_e_tbl->ref_cnt++;
 		break;
 	}
 	return 0;
 }
+
+int32_t
+mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	int ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	ret = __l3t_set_entry(tbl, idx, data);
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
+
+int32_t
+mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		       union mlx5_l3t_data *data,
+		       mlx5_l3t_alloc_callback_fn cb, void *ctx)
+{
+	int32_t ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	/* Check if entry data is ready. */
+	ret = __l3t_get_entry(tbl, idx, data);
+	if (!ret) {
+		switch (tbl->type) {
+		case MLX5_L3T_TYPE_WORD:
+			if (data->word)
+				goto out;
+			break;
+		case MLX5_L3T_TYPE_DWORD:
+			if (data->dword)
+				goto out;
+			break;
+		case MLX5_L3T_TYPE_QWORD:
+			if (data->qword)
+				goto out;
+			break;
+		default:
+			if (data->ptr)
+				goto out;
+			break;
+		}
+	}
+	/* Entry data is not ready, use user callback to create it. */
+	ret = cb(ctx, data);
+	if (ret)
+		goto out;
+	/* Save the new allocated data to entry. */
+	ret = __l3t_set_entry(tbl, idx, data);
+out:
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index f078bdc..ca9bb76 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -118,29 +118,41 @@ struct mlx5_l3t_level_tbl {
 struct mlx5_l3t_entry_word {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint16_t entry[]; /* Entry array. */
-};
+	struct {
+		uint16_t data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 double word entry table data structure. */
 struct mlx5_l3t_entry_dword {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint32_t entry[]; /* Entry array. */
-};
+	struct {
+		uint32_t data;
+		int32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 quad word entry table data structure. */
 struct mlx5_l3t_entry_qword {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint64_t entry[]; /* Entry array. */
-};
+	struct {
+		uint64_t data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 pointer entry table data structure. */
 struct mlx5_l3t_entry_ptr {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	void *entry[]; /* Entry array. */
-};
+	struct {
+		void *data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 table data structure. */
 struct mlx5_l3t_tbl {
@@ -148,8 +160,13 @@ struct mlx5_l3t_tbl {
 	struct mlx5_indexed_pool *eip;
 	/* Table index pool handles. */
 	struct mlx5_l3t_level_tbl *tbl; /* Global table index. */
+	rte_spinlock_t sl; /* The table lock. */
 };
 
+/** Type of function that is used to handle the data before freeing. */
+typedef int32_t (*mlx5_l3t_alloc_callback_fn)(void *ctx,
+					   union mlx5_l3t_data *data);
+
 /*
  * The indexed memory entry index is made up of trunk index and offset of
  * the entry in the trunk. Since the entry index is 32 bits, in case user
@@ -535,32 +552,68 @@ struct mlx5_indexed_pool *
  *   0 if success, -1 on error.
  */
 
-uint32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+int32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
 			    union mlx5_l3t_data *data);
+
 /**
- * This function clears the index entry from Three-level table.
+ * This function gets the index entry from Three-level table.
+ *
+ * If the index entry is not available, allocate new one by callback
+ * function and fill in the entry.
  *
  * @param tbl
  *   Pointer to the l3t.
  * @param idx
  *   Index to the entry.
+ * @param data
+ *   Pointer to the memory which saves the entry data.
+ *   When function call returns 0, data contains the entry data get from
+ *   l3t.
+ *   When function call returns -1, data is not modified.
+ * @param cb
+ *   Callback function to allocate new data.
+ * @param ctx
+ *   Context for callback function.
+ *
+ * @return
+ *   0 if success, -1 on error.
  */
-void mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
+
+int32_t mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+			       union mlx5_l3t_data *data,
+			       mlx5_l3t_alloc_callback_fn cb, void *ctx);
 
 /**
- * This function gets the index entry from Three-level table.
+ * This function decreases and clear index entry if reference
+ * counter is 0 from Three-level table.
  *
  * @param tbl
  *   Pointer to the l3t.
  * @param idx
  *   Index to the entry.
- * @param data
+ *
+ * @return
+ *   The remaining reference count, 0 means entry be cleared, -1 on error.
+ */
+int32_t mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
+
+/**
+ * This function sets the index entry to Three-level table.
+ * If the entry is already set, the EEXIST errno will be given, and
+ * the set data will be filled to the data.
+ *
+ * @param tbl[in]
+ *   Pointer to the l3t.
+ * @param idx[in]
+ *   Index to the entry.
+ * @param data[in/out]
  *   Pointer to the memory which contains the entry data save to l3t.
+ *   If the entry is already set, the set data will be filled.
  *
  * @return
  *   0 if success, -1 on error.
  */
-uint32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+int32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
 			    union mlx5_l3t_data *data);
 
 /*
-- 
1.8.3.1


  parent reply	other threads:[~2020-10-20  3:03 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-06 11:38 [dpdk-dev] [PATCH 0/6] net/mlx5: make counter " Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 1/6] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 2/6] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 3/6] net/mlx5: remove single counter container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 4/6] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 5/6] net/mlx5: make three level table thread safe Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 6/6] net/mlx5: make shared counters " Suanming Mou
2020-10-20  3:02 ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter " Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 1/8] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 2/8] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 3/8] net/mlx5: remove single counter container Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 4/8] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-20  3:02   ` Suanming Mou [this message]
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 6/8] net/mlx5: make shared counters thread safe Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 7/8] net/mlx5: rename flow counter macro Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 8/8] net/mlx5: optimize counter extend memory Suanming Mou
2020-10-20 22:59   ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter thread safe Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1603162949-150001-6-git-send-email-suanmingm@nvidia.com \
    --to=suanmingm@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=shahafs@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git