DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap
@ 2019-03-25 19:36 Yongseok Koh
  2019-03-25 19:36 ` Yongseok Koh
                   ` (7 more replies)
  0 siblings, 8 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

Yongseok Koh (3):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 274 ++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h            |  22 ++-
 drivers/net/mlx4/mlx4_prm.h        |   2 -
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   3 +-
 drivers/net/mlx4/mlx4_txq.c        | 102 +++++---------
 drivers/net/mlx5/mlx5.c            | 262 ++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5.h            |  20 ++-
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   3 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxtx.h       |   7 +-
 drivers/net/mlx5/mlx5_trigger.c    |   2 +-
 drivers/net/mlx5/mlx5_txq.c        |  91 +++++-------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 15 files changed, 481 insertions(+), 322 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
@ 2019-03-25 19:36 ` Yongseok Koh
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

Yongseok Koh (3):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 274 ++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h            |  22 ++-
 drivers/net/mlx4/mlx4_prm.h        |   2 -
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   3 +-
 drivers/net/mlx4/mlx4_txq.c        | 102 +++++---------
 drivers/net/mlx5/mlx5.c            | 262 ++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5.h            |  20 ++-
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   3 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxtx.h       |   7 +-
 drivers/net/mlx5/mlx5_trigger.c    |   2 +-
 drivers/net/mlx5/mlx5_txq.c        |  91 +++++-------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 15 files changed, 481 insertions(+), 322 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
  2019-03-25 19:36 ` Yongseok Koh
@ 2019-03-25 19:36 ` Yongseok Koh
  2019-03-25 19:36   ` Yongseok Koh
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap Yongseok Koh
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 410f17ab53..a82972d166 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dea38e2604..69d77cf8a7 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index ebcdd154fc..c068b3ee42 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 6c4f52f1ec..05f11ab94e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-03-25 19:36   ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 410f17ab53..a82972d166 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index dea38e2604..69d77cf8a7 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index ebcdd154fc..c068b3ee42 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 6c4f52f1ec..05f11ab94e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
  2019-03-25 19:36 ` Yongseok Koh
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-03-25 19:36 ` Yongseok Koh
  2019-03-25 19:36   ` Yongseok Koh
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 3/3] net/mlx4: " Yongseok Koh
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5.h         |  19 ++-
 drivers/net/mlx5/mlx5_rxtx.h    |   7 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_txq.c     |  91 ++++++--------
 5 files changed, 236 insertions(+), 145 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 93c0fc8c20..1860273194 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -26,6 +26,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_bitmap.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
 #include <rte_ethdev_pci.h>
@@ -132,7 +133,7 @@ struct mlx5_shared_data *mlx5_shared_data;
 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx5_local_data mlx5_local_data;
+struct mlx5_local_data mlx5_local_data;
 
 /** Driver-specific log messages type. */
 int mlx5_logtype;
@@ -647,130 +648,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
+
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to expand uar table");
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx5_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx5_uar_init_primary(void)
+void **
+mlx5_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	int ret;
+
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx5_uar_alloc_index(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx5_uar_uninit_primary(void)
+void
+mlx5_uar_free_index(uint32_t idx)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx5_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX5_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx5_shared_data *sd = mlx5_shared_data;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx5_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx5_local_data *ld = &mlx5_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -804,7 +900,7 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -813,7 +909,7 @@ mlx5_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -827,12 +923,12 @@ mlx5_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx5_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx5_mp_uninit_secondary();
 		break;
 	default:
@@ -1058,7 +1154,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 		}
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_txq_uar_init(eth_dev, err);
 		if (err) {
 			err = rte_errno;
 			goto error;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a82972d166..5fe8f6ed0c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,8 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,11 +106,19 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
+extern struct mlx5_local_data mlx5_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])
 
 struct mlx5_counter_ctrl {
 	/* Name of the counter. */
@@ -301,6 +309,9 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+void **mlx5_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx5_uar_alloc_index(void);
+void mlx5_uar_free_index(uint32_t idx);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 53115dde3d..1d28a9c5f2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -202,7 +202,7 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
+	uint32_t bfreg_idx; /* Blueflame register index. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,7 +231,6 @@ struct mlx5_txq_ctrl {
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
 	uint16_t idx; /* Queue index. */
 };
 
@@ -302,7 +301,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -701,7 +700,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c9ff921ab..d98ef87ef5 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->ctx->cmd_fd);
+	ret = mlx5_txq_uar_init(dev, priv->ctx->cmd_fd);
 	if (ret) {
 		/* Adjust index for rollback. */
 		i = priv->txqs_n - 1;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5640fe1b91..0cea4ad45f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * For both primary and secondary, initialize UAR locks for atomic access.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -244,75 +248,48 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 #ifndef RTE_ARCH_64
 	unsigned int lock_idx;
 #endif
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq_ctrl->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			/*
+			 * As rdma-core, UARs are mapped in size of OS page
+			 * size. Ref to libmlx5 function: mlx5_init_context()
 			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
+			addr = mmap(NULL, page_size, PROT_WRITE,
+				    MAP_FIXED | MAP_SHARED, fd,
+				    txq_ctrl->uar_mmap_offset);
+			if (addr == MAP_FAILED) {
 				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
+					"port %u mmap failed for BF reg."
+					" of txq %u",
 					dev->data->port_id, txq_ctrl->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
+			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
+			if (!addr_ptr)
+				return -rte_errno;
+			*addr_ptr = addr;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
 #ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
+		/* Assign an UAR lock according to UAR page number */
 		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
 			   MLX5_UAR_PAGE_NUM_MASK;
 		txq->uar_lock = &priv->uar_lock[lock_idx];
@@ -372,6 +349,7 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	struct mlx5dv_obj obj;
 	const int desc = 1 << txq_data->elts_n;
 	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
+	void **addr_ptr;
 	int ret = 0;
 
 	assert(txq_data);
@@ -507,7 +485,16 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
+	/* Allocate a new index in UAR table. */
+	ret = mlx5_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq_data->bfreg_idx = ret;
+	/* Store the BlueFlame register address in the local table. */
+	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
+	*addr_ptr = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -589,6 +576,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
 {
 	assert(txq_ibv);
 	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
 		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
 		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
 		LIST_REMOVE(txq_ibv, next);
@@ -837,15 +825,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap Yongseok Koh
@ 2019-03-25 19:36   ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5.h         |  19 ++-
 drivers/net/mlx5/mlx5_rxtx.h    |   7 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_txq.c     |  91 ++++++--------
 5 files changed, 236 insertions(+), 145 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 93c0fc8c20..1860273194 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -26,6 +26,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_bitmap.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
 #include <rte_ethdev_pci.h>
@@ -132,7 +133,7 @@ struct mlx5_shared_data *mlx5_shared_data;
 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx5_local_data mlx5_local_data;
+struct mlx5_local_data mlx5_local_data;
 
 /** Driver-specific log messages type. */
 int mlx5_logtype;
@@ -647,130 +648,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
+
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to expand uar table");
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx5_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx5_uar_init_primary(void)
+void **
+mlx5_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	int ret;
+
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx5_uar_alloc_index(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx5_uar_uninit_primary(void)
+void
+mlx5_uar_free_index(uint32_t idx)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx5_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX5_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx5_shared_data *sd = mlx5_shared_data;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx5_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx5_local_data *ld = &mlx5_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -804,7 +900,7 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -813,7 +909,7 @@ mlx5_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -827,12 +923,12 @@ mlx5_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx5_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx5_mp_uninit_secondary();
 		break;
 	default:
@@ -1058,7 +1154,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			goto error;
 		}
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_txq_uar_init(eth_dev, err);
 		if (err) {
 			err = rte_errno;
 			goto error;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a82972d166..5fe8f6ed0c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,8 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,11 +106,19 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
+extern struct mlx5_local_data mlx5_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])
 
 struct mlx5_counter_ctrl {
 	/* Name of the counter. */
@@ -301,6 +309,9 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+void **mlx5_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx5_uar_alloc_index(void);
+void mlx5_uar_free_index(uint32_t idx);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 53115dde3d..1d28a9c5f2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -202,7 +202,7 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
+	uint32_t bfreg_idx; /* Blueflame register index. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,7 +231,6 @@ struct mlx5_txq_ctrl {
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
 	uint16_t idx; /* Queue index. */
 };
 
@@ -302,7 +301,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -701,7 +700,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c9ff921ab..d98ef87ef5 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->ctx->cmd_fd);
+	ret = mlx5_txq_uar_init(dev, priv->ctx->cmd_fd);
 	if (ret) {
 		/* Adjust index for rollback. */
 		i = priv->txqs_n - 1;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5640fe1b91..0cea4ad45f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * For both primary and secondary, initialize UAR locks for atomic access.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -244,75 +248,48 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 #ifndef RTE_ARCH_64
 	unsigned int lock_idx;
 #endif
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq_ctrl->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			/*
+			 * As rdma-core, UARs are mapped in size of OS page
+			 * size. Ref to libmlx5 function: mlx5_init_context()
 			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
+			addr = mmap(NULL, page_size, PROT_WRITE,
+				    MAP_FIXED | MAP_SHARED, fd,
+				    txq_ctrl->uar_mmap_offset);
+			if (addr == MAP_FAILED) {
 				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
+					"port %u mmap failed for BF reg."
+					" of txq %u",
 					dev->data->port_id, txq_ctrl->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
+			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
+			if (!addr_ptr)
+				return -rte_errno;
+			*addr_ptr = addr;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
 #ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
+		/* Assign an UAR lock according to UAR page number */
 		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
 			   MLX5_UAR_PAGE_NUM_MASK;
 		txq->uar_lock = &priv->uar_lock[lock_idx];
@@ -372,6 +349,7 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	struct mlx5dv_obj obj;
 	const int desc = 1 << txq_data->elts_n;
 	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
+	void **addr_ptr;
 	int ret = 0;
 
 	assert(txq_data);
@@ -507,7 +485,16 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
+	/* Allocate a new index in UAR table. */
+	ret = mlx5_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq_data->bfreg_idx = ret;
+	/* Store the BlueFlame register address in the local table. */
+	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
+	*addr_ptr = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -589,6 +576,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
 {
 	assert(txq_ibv);
 	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
 		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
 		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
 		LIST_REMOVE(txq_ibv, next);
@@ -837,15 +825,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 3/3] net/mlx4: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
                   ` (2 preceding siblings ...)
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap Yongseok Koh
@ 2019-03-25 19:36 ` Yongseok Koh
  2019-03-25 19:36   ` Yongseok Koh
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx4_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX4_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 274 ++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h      |  22 +++-
 drivers/net/mlx4/mlx4_prm.h  |   2 -
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   3 +-
 drivers/net/mlx4/mlx4_txq.c  | 102 ++++++----------
 6 files changed, 235 insertions(+), 170 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index d913c2a47e..7749e0f9e4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -63,7 +63,7 @@ struct mlx4_shared_data *mlx4_shared_data;
 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx4_local_data mlx4_local_data;
+struct mlx4_local_data mlx4_local_data;
 
 /** Configuration structure for device arguments. */
 struct mlx4_conf {
@@ -267,11 +267,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -319,8 +314,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -334,15 +327,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -669,128 +653,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to expand uar table");
+		return -rte_errno;
+	}
+	DEBUG("UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx4_uar_init_primary(void)
+void **
+mlx4_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	int ret;
+
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx4_uar_alloc_index(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx4_uar_uninit_primary(void)
+void
+mlx4_uar_free_index(uint32_t idx)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx4_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX4_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx4_shared_data *sd = mlx4_shared_data;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx4_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx4_local_data *ld = &mlx4_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -824,7 +904,7 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -833,7 +913,7 @@ mlx4_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -847,12 +927,12 @@ mlx4_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx4_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx4_mp_uninit_secondary();
 		break;
 	default:
@@ -1011,7 +1091,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_txq_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 3881943ef0..977866e017 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -137,8 +137,8 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -146,11 +146,19 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
+extern struct mlx4_local_data mlx4_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)])
 
 /** Private data structure. */
 struct mlx4_priv {
@@ -197,6 +205,12 @@ struct mlx4_priv {
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
 #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
+/* mlx4.c */
+
+void **mlx4_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx4_uar_alloc_index(void);
+void mlx4_uar_free_index(uint32_t idx);
+
 /* mlx4_ethdev.c */
 
 int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..06ad92d391 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,6 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..513c8a61bf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..d9df98715e 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint32_t bfreg_idx; /**< Blueflame register index. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index ed00843425..51d74d6c80 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -39,10 +39,15 @@
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * Primary process shouldn't call this function.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -52,83 +57,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	/*
 	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
 	 * Ref to libmlx4 function: mlx4_init_context()
 	 */
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
+		addr = mmap(NULL, page_size, PROT_WRITE,
+			    MAP_FIXED | MAP_SHARED, fd,
+			    txq->msq.uar_mmap_offset);
+		if (addr == MAP_FAILED) {
+			ERROR("port %u mmap failed for BF reg. of txq %u",
+			      dev->data->port_id, i);
+			rte_errno = ENXIO;
+			return -rte_errno;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+		if (!addr_ptr)
+			return -rte_errno;
+		*addr_ptr = addr;
 	}
 	return 0;
 }
-#else
-int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
-{
-	/*
-	 * If rdma-core doesn't support UAR remap, secondary process is not
-	 * supported, thus secondary cannot call this function but only primary
-	 * makes a call. Return success to not interrupt initialization.
-	 */
-	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
-	return 0;
-}
 #endif
 
 /**
@@ -185,10 +148,8 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
-#else
-	sq->db = dqp->sdb;
 #endif
+	*mlx4_uar_get_addr_ptr(txq->bfreg_idx) = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -255,6 +216,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
+	void **addr_ptr;
 	struct mlx4_malloc_vec vec[] = {
 		{
 			.align = RTE_CACHE_LINE_SIZE,
@@ -429,6 +391,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		goto error;
 	}
 #endif
+	/* Allocate a new index in UAR table. */
+	ret = mlx4_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq->bfreg_idx = ret;
+	/* Make sure the local UAR register table is properly expanded. */
+	addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
@@ -478,6 +449,7 @@ mlx4_tx_queue_release(void *dpdk_txq)
 			break;
 		}
 	mlx4_txq_free_elts(txq);
+	mlx4_uar_free_index(txq->bfreg_idx);
 	if (txq->qp)
 		claim_zero(mlx4_glue->destroy_qp(txq->qp));
 	if (txq->cq)
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH 3/3] net/mlx4: remove device register remap
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 3/3] net/mlx4: " Yongseok Koh
@ 2019-03-25 19:36   ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-03-25 19:36 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx4_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX4_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 274 ++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h      |  22 +++-
 drivers/net/mlx4/mlx4_prm.h  |   2 -
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   3 +-
 drivers/net/mlx4/mlx4_txq.c  | 102 ++++++----------
 6 files changed, 235 insertions(+), 170 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index d913c2a47e..7749e0f9e4 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -63,7 +63,7 @@ struct mlx4_shared_data *mlx4_shared_data;
 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx4_local_data mlx4_local_data;
+struct mlx4_local_data mlx4_local_data;
 
 /** Configuration structure for device arguments. */
 struct mlx4_conf {
@@ -267,11 +267,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -319,8 +314,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -334,15 +327,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -669,128 +653,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to expand uar table");
+		return -rte_errno;
+	}
+	DEBUG("UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx4_uar_init_primary(void)
+void **
+mlx4_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	int ret;
+
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx4_uar_alloc_index(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx4_uar_uninit_primary(void)
+void
+mlx4_uar_free_index(uint32_t idx)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx4_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX4_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx4_shared_data *sd = mlx4_shared_data;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx4_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx4_local_data *ld = &mlx4_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -824,7 +904,7 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -833,7 +913,7 @@ mlx4_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -847,12 +927,12 @@ mlx4_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx4_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx4_mp_uninit_secondary();
 		break;
 	default:
@@ -1011,7 +1091,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_txq_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 3881943ef0..977866e017 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -137,8 +137,8 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -146,11 +146,19 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
+extern struct mlx4_local_data mlx4_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)])
 
 /** Private data structure. */
 struct mlx4_priv {
@@ -197,6 +205,12 @@ struct mlx4_priv {
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
 #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
+/* mlx4.c */
+
+void **mlx4_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx4_uar_alloc_index(void);
+void mlx4_uar_free_index(uint32_t idx);
+
 /* mlx4_ethdev.c */
 
 int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..06ad92d391 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,6 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..513c8a61bf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..d9df98715e 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint32_t bfreg_idx; /**< Blueflame register index. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index ed00843425..51d74d6c80 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -39,10 +39,15 @@
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * Primary process shouldn't call this function.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -52,83 +57,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	/*
 	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
 	 * Ref to libmlx4 function: mlx4_init_context()
 	 */
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
+		addr = mmap(NULL, page_size, PROT_WRITE,
+			    MAP_FIXED | MAP_SHARED, fd,
+			    txq->msq.uar_mmap_offset);
+		if (addr == MAP_FAILED) {
+			ERROR("port %u mmap failed for BF reg. of txq %u",
+			      dev->data->port_id, i);
+			rte_errno = ENXIO;
+			return -rte_errno;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+		if (!addr_ptr)
+			return -rte_errno;
+		*addr_ptr = addr;
 	}
 	return 0;
 }
-#else
-int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
-{
-	/*
-	 * If rdma-core doesn't support UAR remap, secondary process is not
-	 * supported, thus secondary cannot call this function but only primary
-	 * makes a call. Return success to not interrupt initialization.
-	 */
-	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
-	return 0;
-}
 #endif
 
 /**
@@ -185,10 +148,8 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
-#else
-	sq->db = dqp->sdb;
 #endif
+	*mlx4_uar_get_addr_ptr(txq->bfreg_idx) = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -255,6 +216,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
+	void **addr_ptr;
 	struct mlx4_malloc_vec vec[] = {
 		{
 			.align = RTE_CACHE_LINE_SIZE,
@@ -429,6 +391,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		goto error;
 	}
 #endif
+	/* Allocate a new index in UAR table. */
+	ret = mlx4_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq->bfreg_idx = ret;
+	/* Make sure the local UAR register table is properly expanded. */
+	addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
@@ -478,6 +449,7 @@ mlx4_tx_queue_release(void *dpdk_txq)
 			break;
 		}
 	mlx4_txq_free_elts(txq);
+	mlx4_uar_free_index(txq->bfreg_idx);
 	if (txq->qp)
 		claim_zero(mlx4_glue->destroy_qp(txq->qp));
 	if (txq->cq)
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] net/mlx: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
                   ` (3 preceding siblings ...)
  2019-03-25 19:36 ` [dpdk-dev] [PATCH 3/3] net/mlx4: " Yongseok Koh
@ 2019-04-01 21:22 ` Yongseok Koh
  2019-04-01 21:22   ` Yongseok Koh
                     ` (3 more replies)
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
                   ` (2 subsequent siblings)
  7 siblings, 4 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v2:
* rebase on the latest branch tip
* fix a bug

Yongseok Koh (3):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 274 ++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h            |  22 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   3 +-
 drivers/net/mlx4/mlx4_txq.c        | 113 +++++++--------
 drivers/net/mlx5/mlx5.c            | 262 ++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5.h            |  20 ++-
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   3 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxtx.h       |   8 +-
 drivers/net/mlx5/mlx5_trigger.c    |   2 +-
 drivers/net/mlx5/mlx5_txq.c        |  96 ++++++-------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 15 files changed, 501 insertions(+), 320 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 0/3] net/mlx: remove device register remap
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
@ 2019-04-01 21:22   ` Yongseok Koh
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v2:
* rebase on the latest branch tip
* fix a bug

Yongseok Koh (3):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 274 ++++++++++++++++++++++++-------------
 drivers/net/mlx4/mlx4.h            |  22 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   3 +-
 drivers/net/mlx4/mlx4_txq.c        | 113 +++++++--------
 drivers/net/mlx5/mlx5.c            | 262 ++++++++++++++++++++++++-----------
 drivers/net/mlx5/mlx5.h            |  20 ++-
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   3 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxtx.h       |   8 +-
 drivers/net/mlx5/mlx5_trigger.c    |   2 +-
 drivers/net/mlx5/mlx5_txq.c        |  96 ++++++-------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 15 files changed, 501 insertions(+), 320 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
  2019-04-01 21:22   ` Yongseok Koh
@ 2019-04-01 21:22   ` Yongseok Koh
  2019-04-01 21:22     ` Yongseok Koh
  2019-04-02  5:39     ` Shahaf Shuler
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap Yongseok Koh
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: " Yongseok Koh
  3 siblings, 2 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 47a7d75f7a..8ce8361a85 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bc6a7c1eba..3eb8bf0c83 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 966dad9838..3e0a64ec4b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-01 21:22     ` Yongseok Koh
  2019-04-02  5:39     ` Shahaf Shuler
  1 sibling, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 47a7d75f7a..8ce8361a85 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index bc6a7c1eba..3eb8bf0c83 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 966dad9838..3e0a64ec4b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
  2019-04-01 21:22   ` Yongseok Koh
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-01 21:22   ` Yongseok Koh
  2019-04-01 21:22     ` Yongseok Koh
  2019-04-02  6:50     ` Shahaf Shuler
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: " Yongseok Koh
  3 siblings, 2 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5.h         |  19 ++-
 drivers/net/mlx5/mlx5_rxtx.h    |   8 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_txq.c     |  96 +++++++--------
 5 files changed, 242 insertions(+), 145 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 40445056f5..103841b2bc 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -25,6 +25,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_bitmap.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
 #include <rte_ethdev_pci.h>
@@ -131,7 +132,7 @@ struct mlx5_shared_data *mlx5_shared_data;
 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx5_local_data mlx5_local_data;
+struct mlx5_local_data mlx5_local_data;
 
 /** Driver-specific log messages type. */
 int mlx5_logtype;
@@ -810,130 +811,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
+
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to expand uar table");
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx5_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx5_uar_init_primary(void)
+void **
+mlx5_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	int ret;
+
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx5_uar_alloc_index(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx5_uar_uninit_primary(void)
+void
+mlx5_uar_free_index(uint32_t idx)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx5_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX5_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx5_shared_data *sd = mlx5_shared_data;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx5_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx5_local_data *ld = &mlx5_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -967,7 +1063,7 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -976,7 +1072,7 @@ mlx5_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -990,12 +1086,12 @@ mlx5_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx5_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx5_mp_uninit_secondary();
 		break;
 	default:
@@ -1099,7 +1195,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_txq_uar_init(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8ce8361a85..f77517bee0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,8 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,11 +106,19 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
+extern struct mlx5_local_data mlx5_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])
 
 struct mlx5_counter_ctrl {
 	/* Name of the counter. */
@@ -331,6 +339,9 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+void **mlx5_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx5_uar_alloc_index(void);
+void mlx5_uar_free_index(uint32_t idx);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..b32c1d6e0f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -203,7 +203,7 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
+	uint32_t bfreg_idx; /* Blueflame register index. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -232,7 +232,7 @@ struct mlx5_txq_ctrl {
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 	uint16_t idx; /* Queue index. */
 };
 
@@ -303,7 +303,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -706,7 +706,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..d7f27702e8 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
+	ret = mlx5_txq_uar_init(dev, priv->sh->ctx->cmd_fd);
 	if (ret) {
 		/* Adjust index for rollback. */
 		i = priv->txqs_n - 1;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..d8e0bda371 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * For both primary and secondary, initialize UAR locks for atomic access.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -244,75 +248,52 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
+	uintptr_t uar_va;
+	uintptr_t offset;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 #ifndef RTE_ARCH_64
 	unsigned int lock_idx;
 #endif
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq_ctrl->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			/*
+			 * As rdma-core, UARs are mapped in size of OS page
+			 * size. Ref to libmlx5 function: mlx5_init_context()
 			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
+			uar_va = (uintptr_t)txq_ctrl->bf_reg;
+			offset = uar_va & (page_size - 1); /* Offset in page. */
+			addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+				    txq_ctrl->uar_mmap_offset);
+			if (addr == MAP_FAILED) {
 				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
+					"port %u mmap failed for BF reg."
+					" of txq %u",
 					dev->data->port_id, txq_ctrl->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
+			addr = RTE_PTR_ADD(addr, offset);
+			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
+			if (!addr_ptr)
+				return -rte_errno;
+			*addr_ptr = addr;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
 #ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
+		/* Assign an UAR lock according to UAR page number */
 		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
 			   MLX5_UAR_PAGE_NUM_MASK;
 		txq->uar_lock = &priv->uar_lock[lock_idx];
@@ -372,6 +353,7 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	struct mlx5dv_obj obj;
 	const int desc = 1 << txq_data->elts_n;
 	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
+	void **addr_ptr;
 	int ret = 0;
 
 	assert(txq_data);
@@ -507,7 +489,17 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
+	/* Allocate a new index in UAR table. */
+	ret = mlx5_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq_data->bfreg_idx = ret;
+	txq_ctrl->bf_reg = qp.bf.reg;
+	/* Store the BlueFlame register address in the local table. */
+	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
+	*addr_ptr = txq_ctrl->bf_reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -589,6 +581,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
 {
 	assert(txq_ibv);
 	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
 		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
 		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
 		LIST_REMOVE(txq_ibv, next);
@@ -837,15 +830,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-01 21:22     ` Yongseok Koh
  2019-04-02  6:50     ` Shahaf Shuler
  1 sibling, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5.h         |  19 ++-
 drivers/net/mlx5/mlx5_rxtx.h    |   8 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_txq.c     |  96 +++++++--------
 5 files changed, 242 insertions(+), 145 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 40445056f5..103841b2bc 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -25,6 +25,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_bitmap.h>
 #include <rte_malloc.h>
 #include <rte_ethdev_driver.h>
 #include <rte_ethdev_pci.h>
@@ -131,7 +132,7 @@ struct mlx5_shared_data *mlx5_shared_data;
 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx5_local_data mlx5_local_data;
+struct mlx5_local_data mlx5_local_data;
 
 /** Driver-specific log messages type. */
 int mlx5_logtype;
@@ -810,130 +811,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
+
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to expand uar table");
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx5_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx5_uar_init_primary(void)
+void **
+mlx5_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx5_local_data *ld = &mlx5_local_data;
+	int ret;
+
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx5_uar_alloc_index(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx5_uar_uninit_primary(void)
+void
+mlx5_uar_free_index(uint32_t idx)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx5_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX5_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		DRV_LOG(ERR, "failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx5_shared_data *sd = mlx5_shared_data;
+	struct mlx5_local_data *ld = &mlx5_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx5_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx5_local_data *ld = &mlx5_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -967,7 +1063,7 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -976,7 +1072,7 @@ mlx5_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -990,12 +1086,12 @@ mlx5_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx5_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx5_mp_uninit_secondary();
 		break;
 	default:
@@ -1099,7 +1195,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_txq_uar_init(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8ce8361a85..f77517bee0 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,8 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,11 +106,19 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
+extern struct mlx5_local_data mlx5_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])
 
 struct mlx5_counter_ctrl {
 	/* Name of the counter. */
@@ -331,6 +339,9 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+void **mlx5_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx5_uar_alloc_index(void);
+void mlx5_uar_free_index(uint32_t idx);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..b32c1d6e0f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -203,7 +203,7 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
+	uint32_t bfreg_idx; /* Blueflame register index. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -232,7 +232,7 @@ struct mlx5_txq_ctrl {
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 	uint16_t idx; /* Queue index. */
 };
 
@@ -303,7 +303,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -706,7 +706,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..d7f27702e8 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
+	ret = mlx5_txq_uar_init(dev, priv->sh->ctx->cmd_fd);
 	if (ret) {
 		/* Adjust index for rollback. */
 		i = priv->txqs_n - 1;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..d8e0bda371 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
 
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * For both primary and secondary, initialize UAR locks for atomic access.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -244,75 +248,52 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
+	uintptr_t uar_va;
+	uintptr_t offset;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 #ifndef RTE_ARCH_64
 	unsigned int lock_idx;
 #endif
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq_ctrl->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
+		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			/*
+			 * As rdma-core, UARs are mapped in size of OS page
+			 * size. Ref to libmlx5 function: mlx5_init_context()
 			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
+			uar_va = (uintptr_t)txq_ctrl->bf_reg;
+			offset = uar_va & (page_size - 1); /* Offset in page. */
+			addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+				    txq_ctrl->uar_mmap_offset);
+			if (addr == MAP_FAILED) {
 				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
+					"port %u mmap failed for BF reg."
+					" of txq %u",
 					dev->data->port_id, txq_ctrl->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
+			addr = RTE_PTR_ADD(addr, offset);
+			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
+			if (!addr_ptr)
+				return -rte_errno;
+			*addr_ptr = addr;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
 #ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
+		/* Assign an UAR lock according to UAR page number */
 		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
 			   MLX5_UAR_PAGE_NUM_MASK;
 		txq->uar_lock = &priv->uar_lock[lock_idx];
@@ -372,6 +353,7 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	struct mlx5dv_obj obj;
 	const int desc = 1 << txq_data->elts_n;
 	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
+	void **addr_ptr;
 	int ret = 0;
 
 	assert(txq_data);
@@ -507,7 +489,17 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
+	/* Allocate a new index in UAR table. */
+	ret = mlx5_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq_data->bfreg_idx = ret;
+	txq_ctrl->bf_reg = qp.bf.reg;
+	/* Store the BlueFlame register address in the local table. */
+	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
+	*addr_ptr = txq_ctrl->bf_reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -589,6 +581,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
 {
 	assert(txq_ibv);
 	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
 		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
 		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
 		LIST_REMOVE(txq_ibv, next);
@@ -837,15 +830,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] net/mlx4: remove device register remap
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
                     ` (2 preceding siblings ...)
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-01 21:22   ` Yongseok Koh
  2019-04-01 21:22     ` Yongseok Koh
  3 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx4_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX4_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 274 ++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h      |  22 +++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   3 +-
 drivers/net/mlx4/mlx4_txq.c  | 113 ++++++++----------
 6 files changed, 249 insertions(+), 168 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 252658fc6a..7afe4db2e1 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -58,7 +58,7 @@ struct mlx4_shared_data *mlx4_shared_data;
 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx4_local_data mlx4_local_data;
+struct mlx4_local_data mlx4_local_data;
 
 /** Configuration structure for device arguments. */
 struct mlx4_conf {
@@ -262,11 +262,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,8 +309,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -329,15 +322,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -662,128 +646,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to expand uar table");
+		return -rte_errno;
+	}
+	DEBUG("UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx4_uar_init_primary(void)
+void **
+mlx4_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	int ret;
+
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx4_uar_alloc_index(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx4_uar_uninit_primary(void)
+void
+mlx4_uar_free_index(uint32_t idx)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx4_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX4_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx4_shared_data *sd = mlx4_shared_data;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx4_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx4_local_data *ld = &mlx4_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -817,7 +897,7 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -826,7 +906,7 @@ mlx4_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -840,12 +920,12 @@ mlx4_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx4_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx4_mp_uninit_secondary();
 		break;
 	default:
@@ -1012,7 +1092,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_txq_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..6ad9c34856 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,8 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,11 +147,19 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
+extern struct mlx4_local_data mlx4_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)])
 
 /** Private data structure. */
 struct mlx4_priv {
@@ -198,6 +206,12 @@ struct mlx4_priv {
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
 #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
+/* mlx4.c */
+
+void **mlx4_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx4_uar_alloc_index(void);
+void mlx4_uar_free_index(uint32_t idx);
+
 /* mlx4_ethdev.c */
 
 int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..616e4ea174 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the Doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..513c8a61bf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..d9df98715e 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint32_t bfreg_idx; /**< Blueflame register index. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..67093e5d2f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -39,10 +39,15 @@
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * Primary process shouldn't call this function.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -52,81 +57,52 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
+	uintptr_t uar_va;
+	uintptr_t offset;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
+		/*
+		 * As rdma-core, UARs are mapped in size of OS page size. Ref
+		 * to libmlx4 function: mlx4_init_context()
+		 */
+		uar_va = (uintptr_t)txq->msq.db;
+		offset = uar_va & (page_size - 1); /* Offset in page. */
+		addr = mmap(NULL, page_size, PROT_WRITE,
+			    MAP_FIXED | MAP_SHARED, fd,
+			    txq->msq.uar_mmap_offset);
+		if (addr == MAP_FAILED) {
+			ERROR("port %u mmap failed for BF reg. of txq %u",
+			      dev->data->port_id, i);
+			rte_errno = ENXIO;
+			return -rte_errno;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		addr = RTE_PTR_ADD(addr, offset);
+		addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+		if (!addr_ptr)
+			return -rte_errno;
+		*addr_ptr = addr;
 	}
 	return 0;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			    int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +163,11 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
+	*mlx4_uar_get_addr_ptr(txq->bfreg_idx) = sq->db;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -258,6 +234,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
+	void **addr_ptr;
 	struct mlx4_malloc_vec vec[] = {
 		{
 			.align = RTE_CACHE_LINE_SIZE,
@@ -431,6 +408,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
 	}
 #endif
+	/* Allocate a new index in UAR table. */
+	ret = mlx4_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq->bfreg_idx = ret;
+	/* Make sure the local UAR register table is properly expanded. */
+	addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
@@ -480,6 +466,7 @@ mlx4_tx_queue_release(void *dpdk_txq)
 			break;
 		}
 	mlx4_txq_free_elts(txq);
+	mlx4_uar_free_index(txq->bfreg_idx);
 	if (txq->qp)
 		claim_zero(mlx4_glue->destroy_qp(txq->qp));
 	if (txq->cq)
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v2 3/3] net/mlx4: remove device register remap
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: " Yongseok Koh
@ 2019-04-01 21:22     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-01 21:22 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) registers will be stored in a process-local table
and a process accesses a register in a table entry with index. Alloc/free
of table entry is managed by a global bitmap.

When there's a need to store a UAR register such as Tx BlueFlame register
for doorbell, an index should be allocated by mlx4_uar_alloc_index() and
address of the allocated table entry must be acquired by
mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed.
The local UAR register table doesn't cover all the indexes in the bitmap.
This will be expanded if more indexes are allocated than the current size
of the table.

For example, the BlueFlame register for Tx doorbell has to be remapped on
each secondary process. On initialization, primary process allocates an
index for the UAR register table and stores the register address in the
indexed entry of its own table when configuring a Tx queue. The index is
stored in the shared memory(txq->bfreg_idx) and visiable to secondary
processes. As secondary processes know the index, each process stores
remapped register in the same indexed entry of its local UAR register
table.

On the datapath of each process, the register can be referenced simply by
MLX4_UAR_REG(idx) which accesses its local UAR register table by the index.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 274 ++++++++++++++++++++++++++++---------------
 drivers/net/mlx4/mlx4.h      |  22 +++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   3 +-
 drivers/net/mlx4/mlx4_txq.c  | 113 ++++++++----------
 6 files changed, 249 insertions(+), 168 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 252658fc6a..7afe4db2e1 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -58,7 +58,7 @@ struct mlx4_shared_data *mlx4_shared_data;
 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* Process local data for secondary processes. */
-static struct mlx4_local_data mlx4_local_data;
+struct mlx4_local_data mlx4_local_data;
 
 /** Configuration structure for device arguments. */
 struct mlx4_conf {
@@ -262,11 +262,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,8 +309,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -329,15 +322,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -662,128 +646,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
+/**
+ * Expand the local UAR register table.
+ *
+ * @param size
+ *   Size of the table to be expanded
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
+uar_expand_table(uint32_t size)
 {
-	void **addr = arg;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	void *mem;
+	size_t tbl_sz = ld->uar_table_sz;
 
-	if (msl->external)
+	if (size <= tbl_sz)
 		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
+	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
+	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
+			  RTE_CACHE_LINE_SIZE);
+	if (!mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to expand uar table");
+		return -rte_errno;
+	}
+	DEBUG("UAR reg. table is expanded to %zu", tbl_sz);
+	ld->uar_table = mem;
+	ld->uar_table_sz = tbl_sz;
 	return 0;
 }
 
 /**
- * Reserve UAR address space for primary process.
+ * Return the pointer of the indexed slot in the local UAR register table.
  *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
+ * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And
+ * the table will be expanded if overflowed.
+ *
+ * @param idx
+ *   Index of the table.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
  */
-static int
-mlx4_uar_init_primary(void)
+void **
+mlx4_uar_get_addr_ptr(uint32_t idx)
+{
+	struct mlx4_local_data *ld = &mlx4_local_data;
+	int ret;
+
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	if (idx >= ld->uar_table_sz) {
+		ret = uar_expand_table(idx + 1);
+		if (ret)
+			return NULL;
+	}
+	return &(*ld->uar_table)[idx];
+}
+
+/**
+ * Allocate a slot of UAR register table.
+ *
+ * Allocation is done by scanning the global bitmap. The global spinlock should
+ * be held.
+ *
+ * @return
+ *   Index of a free slot on success, a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+uint32_t
+mlx4_uar_alloc_index(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
+	uint32_t idx = 0;
+	uint64_t slab = 0;
+	int ret;
 
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	rte_spinlock_lock(&sd->lock);
+	__rte_bitmap_scan_init(sd->uar_bmp);
+	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
+	if (unlikely(!ret)) {
+		/*
+		 * This cannot happen unless there are unreasonably large number
+		 * of queues and ports.
+		 */
+		rte_errno = ENOSPC;
+		rte_spinlock_unlock(&sd->lock);
 		return -rte_errno;
 	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
+	idx += __builtin_ctzll(slab);
+	/* Mark the slot is occupied. */
+	rte_bitmap_clear(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is allocated in UAR reg. table", idx);
+	return idx;
 }
 
 /**
- * Unmap UAR address space reserved for primary process.
+ * Free a slot of UAR register table.
  */
-static void
-mlx4_uar_uninit_primary(void)
+void
+mlx4_uar_free_index(uint32_t idx)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
 
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(idx < MLX4_UAR_TABLE_SIZE_MAX);
+	rte_spinlock_lock(&sd->lock);
+	/* Mark the slot is empty. */
+	rte_bitmap_set(sd->uar_bmp, idx);
+	rte_spinlock_unlock(&sd->lock);
+	DEBUG("index %d is freed in UAR reg. table", idx);
 }
 
 /**
- * Reserve UAR address space for secondary process, align with primary process.
+ * Initialize UAR register table bitmap.
+ *
+ * UAR registers will be stored in a process-local table and the table is
+ * managed by a global bitmap. When there's a need to store a UAR register, an
+ * index should be allocated by mlx4_uar_alloc_index() and address of the
+ * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the
+ * table can be expanded if overflowed.
+ *
+ * The local UAR register table doesn't cover all the indexes in the bitmap.
+ * This will be expanded if more indexes are allocated than the current size of
+ * the table.
+ *
+ * Secondary process should have reference of the index and store remapped
+ * register at the same index in its local UAR register table.
+ *
+ * On the datapath of each process, the register can be referenced simply by
+ * MLX4_UAR_REG(idx).
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_uar_init_secondary(void)
+uar_init_primary(void)
 {
 	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
+	struct rte_bitmap *bmp;
+	void *bmp_mem;
+	uint32_t bmp_size;
+	unsigned int i;
 
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
+	bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX);
+	bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE);
+	if (!bmp_mem) {
+		rte_errno = ENOMEM;
+		ERROR("failed to allocate memory for uar table");
 		return -rte_errno;
 	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
+	bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size);
+	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
+	for (i = 0; i < bmp->array2_size; ++i)
+		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1);
+	sd->uar_bmp = bmp;
+	return 0;
+}
+
+/**
+ * Un-initialize UAR register resources.
+ *
+ * The global bitmap and the register table of primary process are freed.
+ */
+static void
+uar_uninit_primary(void)
+{
+	struct mlx4_shared_data *sd = mlx4_shared_data;
+	struct mlx4_local_data *ld = &mlx4_local_data;
+
+	if (sd->uar_bmp) {
+		rte_bitmap_free(sd->uar_bmp);
+		rte_free(sd->uar_bmp);
+		sd->uar_bmp = NULL;
+	}
+	/* Free primary's table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
 	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
+}
+
+/**
+ * Initialize UAR register resources for secondary process.
+ *
+ * Allocate the local UAR register table. Initially, the number of entries is
+ * same as the size of a bitmap slab.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+uar_init_secondary(void)
+{
+	/* Prepare at least a bitmap slab. */
+	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
 	return 0;
 }
 
 /**
- * Unmap UAR address space reserved for secondary process.
+ * Un-initialize UAR register resources for secondary process.
+ *
+ * The local UAR register table is freed.
  */
 static void
-mlx4_uar_uninit_secondary(void)
+uar_uninit_secondary(void)
 {
 	struct mlx4_local_data *ld = &mlx4_local_data;
 
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
+	/* Free process-local table. */
+	if (ld->uar_table) {
+		rte_free(ld->uar_table);
+		ld->uar_table = NULL;
+		ld->uar_table_sz = 0;
+	}
 }
 
 /**
@@ -817,7 +897,7 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
+		ret = uar_init_primary();
 		if (ret)
 			goto error;
 		sd->init_done = true;
@@ -826,7 +906,7 @@ mlx4_init_once(void)
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
+		ret = uar_init_secondary();
 		if (ret)
 			goto error;
 		++sd->secondary_cnt;
@@ -840,12 +920,12 @@ mlx4_init_once(void)
 error:
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
+		uar_uninit_primary();
 		mlx4_mp_uninit_primary();
 		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
 		break;
 	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
+		uar_uninit_secondary();
 		mlx4_mp_uninit_secondary();
 		break;
 	default:
@@ -1012,7 +1092,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_txq_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..6ad9c34856 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,8 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	struct rte_bitmap *uar_bmp;
+	/* Bitmap to keep track of BlueFlame register table. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,11 +147,19 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
+	void *(*uar_table)[];
+	/* Table of BlueFlame registers for each process. */
+	size_t uar_table_sz;
+	/* Size of BlueFlame register table. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
+extern struct mlx4_local_data mlx4_local_data;
+
+/* The maximum size of BlueFlame register table. */
+#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT)
+
+#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)])
 
 /** Private data structure. */
 struct mlx4_priv {
@@ -198,6 +206,12 @@ struct mlx4_priv {
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
 #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
+/* mlx4.c */
+
+void **mlx4_uar_get_addr_ptr(uint32_t idx);
+uint32_t mlx4_uar_alloc_index(void);
+void mlx4_uar_free_index(uint32_t idx);
+
 /* mlx4_ethdev.c */
 
 int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]);
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..616e4ea174 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the Doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..513c8a61bf 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..d9df98715e 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint32_t bfreg_idx; /**< Blueflame register index. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..67093e5d2f 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -39,10 +39,15 @@
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize UAR register access for Tx.
+ *
+ * Primary process shouldn't call this function.
+ *
+ * For secondary, remap BlueFlame registers for secondary process. Remapped
+ * address is stored at the same indexed entry of the local UAR register table
+ * as primary process.
  *
  * @param[in] dev
  *   Pointer to Ethernet device.
@@ -52,81 +57,52 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
+	void *addr;
+	void **addr_ptr;
+	uintptr_t uar_va;
+	uintptr_t offset;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
+		/*
+		 * As rdma-core, UARs are mapped in size of OS page size. Ref
+		 * to libmlx4 function: mlx4_init_context()
+		 */
+		uar_va = (uintptr_t)txq->msq.db;
+		offset = uar_va & (page_size - 1); /* Offset in page. */
+		addr = mmap(NULL, page_size, PROT_WRITE,
+			    MAP_FIXED | MAP_SHARED, fd,
+			    txq->msq.uar_mmap_offset);
+		if (addr == MAP_FAILED) {
+			ERROR("port %u mmap failed for BF reg. of txq %u",
+			      dev->data->port_id, i);
+			rte_errno = ENXIO;
+			return -rte_errno;
 		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		addr = RTE_PTR_ADD(addr, offset);
+		addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+		if (!addr_ptr)
+			return -rte_errno;
+		*addr_ptr = addr;
 	}
 	return 0;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			    int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +163,11 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
+	*mlx4_uar_get_addr_ptr(txq->bfreg_idx) = sq->db;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -258,6 +234,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	struct ibv_qp_init_attr qp_init_attr;
 	struct txq *txq;
 	uint8_t *bounce_buf;
+	void **addr_ptr;
 	struct mlx4_malloc_vec vec[] = {
 		{
 			.align = RTE_CACHE_LINE_SIZE,
@@ -431,6 +408,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
 	}
 #endif
+	/* Allocate a new index in UAR table. */
+	ret = mlx4_uar_alloc_index();
+	if (ret < 0)
+		goto error;
+	txq->bfreg_idx = ret;
+	/* Make sure the local UAR register table is properly expanded. */
+	addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx);
+	if (!addr_ptr)
+		goto error;
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
@@ -480,6 +466,7 @@ mlx4_tx_queue_release(void *dpdk_txq)
 			break;
 		}
 	mlx4_txq_free_elts(txq);
+	mlx4_uar_free_index(txq->bfreg_idx);
 	if (txq->qp)
 		claim_zero(mlx4_glue->destroy_qp(txq->qp));
 	if (txq->cq)
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
  2019-04-01 21:22     ` Yongseok Koh
@ 2019-04-02  5:39     ` Shahaf Shuler
  2019-04-02  5:39       ` Shahaf Shuler
  1 sibling, 1 reply; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-02  5:39 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Tuesday, April 2, 2019 12:22 AM, Yongseok Koh:
> Subject: [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
> 
> mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

Acked-by: Shahaf Shuler <shahafs@mellanox.com>

> ---
>  drivers/net/mlx5/mlx5.h            | 1 -
>  drivers/net/mlx5/mlx5_flow.c       | 5 +++--
>  drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
>  drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
>  drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
>  5 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 47a7d75f7a..8ce8361a85 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -33,7 +33,6 @@
> 
>  #include "mlx5_utils.h"
>  #include "mlx5_mr.h"
> -#include "mlx5_rxtx.h"
>  #include "mlx5_autoconf.h"
>  #include "mlx5_defs.h"
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index bc6a7c1eba..3eb8bf0c83 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -30,9 +30,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
> -#include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_glue.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  /* Dev ops structure defined in mlx5.c */  extern const struct eth_dev_ops
> mlx5_dev_ops; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 966dad9838..3e0a64ec4b 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -29,9 +29,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 49dd13e6d2..3956df1a7e 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -29,9 +29,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
> -#include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_glue.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  #define VERBS_SPEC_INNER(item_flags) \
>  	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ?
> IBV_FLOW_SPEC_INNER : 0) diff --git a/drivers/net/mlx5/mlx5_vlan.c
> b/drivers/net/mlx5/mlx5_vlan.c index 6568a3a475..4004930942 100644
> --- a/drivers/net/mlx5/mlx5_vlan.c
> +++ b/drivers/net/mlx5/mlx5_vlan.c
> @@ -27,10 +27,11 @@
>  #include <rte_ethdev_driver.h>
>  #include <rte_common.h>
> 
> -#include "mlx5_utils.h"
>  #include "mlx5.h"
>  #include "mlx5_autoconf.h"
>  #include "mlx5_glue.h"
> +#include "mlx5_rxtx.h"
> +#include "mlx5_utils.h"
> 
>  /**
>   * DPDK callback to configure a VLAN filter.
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
  2019-04-02  5:39     ` Shahaf Shuler
@ 2019-04-02  5:39       ` Shahaf Shuler
  0 siblings, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-02  5:39 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Tuesday, April 2, 2019 12:22 AM, Yongseok Koh:
> Subject: [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file
> 
> mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

Acked-by: Shahaf Shuler <shahafs@mellanox.com>

> ---
>  drivers/net/mlx5/mlx5.h            | 1 -
>  drivers/net/mlx5/mlx5_flow.c       | 5 +++--
>  drivers/net/mlx5/mlx5_flow_dv.c    | 3 ++-
>  drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
>  drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
>  5 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 47a7d75f7a..8ce8361a85 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -33,7 +33,6 @@
> 
>  #include "mlx5_utils.h"
>  #include "mlx5_mr.h"
> -#include "mlx5_rxtx.h"
>  #include "mlx5_autoconf.h"
>  #include "mlx5_defs.h"
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index bc6a7c1eba..3eb8bf0c83 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -30,9 +30,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
> -#include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_glue.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  /* Dev ops structure defined in mlx5.c */  extern const struct eth_dev_ops
> mlx5_dev_ops; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c
> b/drivers/net/mlx5/mlx5_flow_dv.c index 966dad9838..3e0a64ec4b 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -29,9 +29,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
>  #include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index 49dd13e6d2..3956df1a7e 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -29,9 +29,10 @@
> 
>  #include "mlx5.h"
>  #include "mlx5_defs.h"
> -#include "mlx5_prm.h"
> -#include "mlx5_glue.h"
>  #include "mlx5_flow.h"
> +#include "mlx5_glue.h"
> +#include "mlx5_prm.h"
> +#include "mlx5_rxtx.h"
> 
>  #define VERBS_SPEC_INNER(item_flags) \
>  	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ?
> IBV_FLOW_SPEC_INNER : 0) diff --git a/drivers/net/mlx5/mlx5_vlan.c
> b/drivers/net/mlx5/mlx5_vlan.c index 6568a3a475..4004930942 100644
> --- a/drivers/net/mlx5/mlx5_vlan.c
> +++ b/drivers/net/mlx5/mlx5_vlan.c
> @@ -27,10 +27,11 @@
>  #include <rte_ethdev_driver.h>
>  #include <rte_common.h>
> 
> -#include "mlx5_utils.h"
>  #include "mlx5.h"
>  #include "mlx5_autoconf.h"
>  #include "mlx5_glue.h"
> +#include "mlx5_rxtx.h"
> +#include "mlx5_utils.h"
> 
>  /**
>   * DPDK callback to configure a VLAN filter.
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
  2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap Yongseok Koh
  2019-04-01 21:22     ` Yongseok Koh
@ 2019-04-02  6:50     ` Shahaf Shuler
  2019-04-02  6:50       ` Shahaf Shuler
  1 sibling, 1 reply; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-02  6:50 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Hi Koh,

See my comments below, same comments apply for mlx4 patch. 

Tuesday, April 2, 2019 12:22 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
> 
> UAR (User Access Region) registers will be stored in a process-local table and
> a process accesses a register in a table entry with index. Alloc/free of table
> entry is managed by a global bitmap.
> 
> When there's a need to store a UAR register such as Tx BlueFlame register
> for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
> address of the allocated table entry must be acquired by
> mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
> The local UAR register table doesn't cover all the indexes in the bitmap.
> This will be expanded if more indexes are allocated than the current size of
> the table.
> 
> For example, the BlueFlame register for Tx doorbell has to be remapped on
> each secondary process. On initialization, primary process allocates an index
> for the UAR register table and stores the register address in the indexed
> entry of its own table when configuring a Tx queue. The index is stored in the
> shared memory(txq->bfreg_idx) and visiable to secondary processes. As
> secondary processes know the index, each process stores remapped register
> in the same indexed entry of its local UAR register table.
> 
> On the datapath of each process, the register can be referenced simply by
> MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++--------
> -----
>  drivers/net/mlx5/mlx5.h         |  19 ++-
>  drivers/net/mlx5/mlx5_rxtx.h    |   8 +-
>  drivers/net/mlx5/mlx5_trigger.c |   2 +-
>  drivers/net/mlx5/mlx5_txq.c     |  96 +++++++--------
>  5 files changed, 242 insertions(+), 145 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 40445056f5..103841b2bc 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -25,6 +25,7 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> 
> +#include <rte_bitmap.h>
>  #include <rte_malloc.h>
>  #include <rte_ethdev_driver.h>
>  #include <rte_ethdev_pci.h>
> @@ -131,7 +132,7 @@ struct mlx5_shared_data *mlx5_shared_data;  static
> rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> 
>  /* Process local data for secondary processes. */ -static struct
> mlx5_local_data mlx5_local_data;
> +struct mlx5_local_data mlx5_local_data;
> 
>  /** Driver-specific log messages type. */  int mlx5_logtype; @@ -810,130
> +811,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs
> *devargs)
> 
>  static struct rte_pci_driver mlx5_driver;
> 
> +
> +/**
> + * Expand the local UAR register table.
> + *
> + * @param size
> + *   Size of the table to be expanded
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
>  static int
> -find_lower_va_bound(const struct rte_memseg_list *msl,
> -		const struct rte_memseg *ms, void *arg)
> +uar_expand_table(uint32_t size)

This function needs to be protected w/ mutex. Since it can be called by multiple control threads on different eth devices. 

>  {
> -	void **addr = arg;
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +	void *mem;
> +	size_t tbl_sz = ld->uar_table_sz;
> 
> -	if (msl->external)
> +	if (size <= tbl_sz)
>  		return 0;
> -	if (*addr == NULL)
> -		*addr = ms->addr;
> -	else
> -		*addr = RTE_MIN(*addr, ms->addr);
> -
> +	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
> +	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
> +			  RTE_CACHE_LINE_SIZE);
> +	if (!mem) {
> +		rte_errno = ENOMEM;
> +		DRV_LOG(ERR, "failed to expand uar table");
> +		return -rte_errno;
> +	}
> +	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
> +	ld->uar_table = mem;
> +	ld->uar_table_sz = tbl_sz;
>  	return 0;
>  }
> 
>  /**
> - * Reserve UAR address space for primary process.
> + * Return the pointer of the indexed slot in the local UAR register table.
>   *
> - * Process local resource is used by both primary and secondary to avoid
> - * duplicate reservation. The space has to be available on both primary and
> - * secondary process, TXQ UAR maps to this area using fixed mmap w/o
> double
> - * check.
> + * The indexed slot must be allocated by mlx5_uar_alloc_index() in
> + advance. And
> + * the table will be expanded if overflowed.
> + *
> + * @param idx
> + *   Index of the table.
>   *
>   * @return
> - *   0 on success, a negative errno value otherwise and rte_errno is set.
> + *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
>   */
> -static int
> -mlx5_uar_init_primary(void)
> +void **
> +mlx5_uar_get_addr_ptr(uint32_t idx)

Wondering if we can possibly have coherency issue here.
Suppose we have 2 eth devices. One is doing datapath, one is at configuration stage. 
The one on configuration stage may trigger the expand
The one on datapath may read from the UAR table.

> +{
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +	int ret;
> +
> +	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +	if (idx >= ld->uar_table_sz) {
> +		ret = uar_expand_table(idx + 1);
> +		if (ret)
> +			return NULL;
> +	}
> +	return &(*ld->uar_table)[idx];
> +}
> +
> +/**
> + * Allocate a slot of UAR register table.
> + *
> + * Allocation is done by scanning the global bitmap. The global
> +spinlock should
> + * be held.
> + *
> + * @return
> + *   Index of a free slot on success, a negative errno value otherwise and
> + *   rte_errno is set.
> + */
> +uint32_t
> +mlx5_uar_alloc_index(void)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> -	void *addr = (void *)0;
> +	uint32_t idx = 0;
> +	uint64_t slab = 0;
> +	int ret;
> 
> -	if (sd->uar_base)
> -		return 0;
> -	/* find out lower bound of hugepage segments */
> -	rte_memseg_walk(find_lower_va_bound, &addr);
> -	/* keep distance to hugepages to minimize potential conflicts. */
> -	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET +
> MLX5_UAR_SIZE));
> -	/* anonymous mmap, no real memory consumption. */
> -	addr = mmap(addr, MLX5_UAR_SIZE,
> -		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -	if (addr == MAP_FAILED) {
> -		DRV_LOG(ERR,
> -			"Failed to reserve UAR address space, please"
> -			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
> -		rte_errno = ENOMEM;
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	rte_spinlock_lock(&sd->lock);
> +	__rte_bitmap_scan_init(sd->uar_bmp);
> +	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
> +	if (unlikely(!ret)) {
> +		/*
> +		 * This cannot happen unless there are unreasonably large
> number
> +		 * of queues and ports.
> +		 */
> +		rte_errno = ENOSPC;
> +		rte_spinlock_unlock(&sd->lock);
>  		return -rte_errno;
>  	}
> -	/* Accept either same addr or a new addr returned from mmap if
> target
> -	 * range occupied.
> -	 */
> -	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> -	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
> -	return 0;
> +	idx += __builtin_ctzll(slab);
> +	/* Mark the slot is occupied. */
> +	rte_bitmap_clear(sd->uar_bmp, idx);
> +	rte_spinlock_unlock(&sd->lock);
> +	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
> +	return idx;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for primary process.
> + * Free a slot of UAR register table.
>   */
> -static void
> -mlx5_uar_uninit_primary(void)
> +void
> +mlx5_uar_free_index(uint32_t idx)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> 
> -	if (!sd->uar_base)
> -		return;
> -	munmap(sd->uar_base, MLX5_UAR_SIZE);
> -	sd->uar_base = NULL;
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +	rte_spinlock_lock(&sd->lock);
> +	/* Mark the slot is empty. */
> +	rte_bitmap_set(sd->uar_bmp, idx);
> +	rte_spinlock_unlock(&sd->lock);
> +	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
>  }
> 
>  /**
> - * Reserve UAR address space for secondary process, align with primary
> process.
> + * Initialize UAR register table bitmap.
> + *
> + * UAR registers will be stored in a process-local table and the table
> + is
> + * managed by a global bitmap. When there's a need to store a UAR
> + register, an
> + * index should be allocated by mlx5_uar_alloc_index() and address of
> + the
> + * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so
> + that the
> + * table can be expanded if overflowed.
> + *
> + * The local UAR register table doesn't cover all the indexes in the bitmap.
> + * This will be expanded if more indexes are allocated than the current
> + size of
> + * the table.
> + *
> + * Secondary process should have reference of the index and store
> + remapped
> + * register at the same index in its local UAR register table.
> + *
> + * On the datapath of each process, the register can be referenced
> + simply by
> + * MLX5_UAR_REG(idx).
>   *
>   * @return
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_uar_init_secondary(void)
> +uar_init_primary(void)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> -	struct mlx5_local_data *ld = &mlx5_local_data;
> -	void *addr;
> +	struct rte_bitmap *bmp;
> +	void *bmp_mem;
> +	uint32_t bmp_size;
> +	unsigned int i;
> 
> -	if (ld->uar_base) { /* Already reserved. */
> -		assert(sd->uar_base == ld->uar_base);
> -		return 0;
> -	}
> -	assert(sd->uar_base);
> -	/* anonymous mmap, no real memory consumption. */
> -	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
> -		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -	if (addr == MAP_FAILED) {
> -		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
> -			sd->uar_base, MLX5_UAR_SIZE);
> -		rte_errno = ENXIO;
> +	bmp_size =
> rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
> +	bmp_mem = rte_zmalloc("uar_table", bmp_size,
> RTE_CACHE_LINE_SIZE);
> +	if (!bmp_mem) {
> +		rte_errno = ENOMEM;
> +		DRV_LOG(ERR, "failed to allocate memory for uar table");
>  		return -rte_errno;
>  	}
> -	if (sd->uar_base != addr) {
> -		DRV_LOG(ERR,
> -			"UAR address %p size %llu occupied, please"
> -			" adjust MLX5_UAR_OFFSET or try EAL parameter"
> -			" --base-virtaddr",
> -			sd->uar_base, MLX5_UAR_SIZE);
> -		rte_errno = ENXIO;
> -		return -rte_errno;
> +	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem,
> bmp_size);
> +	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
> +	for (i = 0; i < bmp->array2_size; ++i)
> +		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -
> 1);
> +	sd->uar_bmp = bmp;
> +	return 0;
> +}
> +
> +/**
> + * Un-initialize UAR register resources.
> + *
> + * The global bitmap and the register table of primary process are freed.
> + */
> +static void
> +uar_uninit_primary(void)
> +{
> +	struct mlx5_shared_data *sd = mlx5_shared_data;
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +
> +	if (sd->uar_bmp) {
> +		rte_bitmap_free(sd->uar_bmp);
> +		rte_free(sd->uar_bmp);
> +		sd->uar_bmp = NULL;
> +	}
> +	/* Free primary's table. */
> +	if (ld->uar_table) {
> +		rte_free(ld->uar_table);
> +		ld->uar_table = NULL;
> +		ld->uar_table_sz = 0;
>  	}
> -	ld->uar_base = addr;
> -	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> +}
> +
> +/**
> + * Initialize UAR register resources for secondary process.
> + *
> + * Allocate the local UAR register table. Initially, the number of
> +entries is
> + * same as the size of a bitmap slab.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +uar_init_secondary(void)
> +{
> +	/* Prepare at least a bitmap slab. */
> +	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
>  	return 0;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for secondary process.
> + * Un-initialize UAR register resources for secondary process.
> + *
> + * The local UAR register table is freed.
>   */
>  static void
> -mlx5_uar_uninit_secondary(void)
> +uar_uninit_secondary(void)
>  {
>  	struct mlx5_local_data *ld = &mlx5_local_data;
> 
> -	if (!ld->uar_base)
> -		return;
> -	munmap(ld->uar_base, MLX5_UAR_SIZE);
> -	ld->uar_base = NULL;
> +	/* Free process-local table. */
> +	if (ld->uar_table) {
> +		rte_free(ld->uar_table);
> +		ld->uar_table = NULL;
> +		ld->uar_table_sz = 0;
> +	}
>  }
> 
>  /**
> @@ -967,7 +1063,7 @@ mlx5_init_once(void)
> 
> 	rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
>  						mlx5_mr_mem_event_cb,
> NULL);
>  		mlx5_mp_init_primary();
> -		ret = mlx5_uar_init_primary();
> +		ret = uar_init_primary();
>  		if (ret)
>  			goto error;
>  		sd->init_done = true;
> @@ -976,7 +1072,7 @@ mlx5_init_once(void)
>  		if (ld->init_done)
>  			break;
>  		mlx5_mp_init_secondary();
> -		ret = mlx5_uar_init_secondary();
> +		ret = uar_init_secondary();
>  		if (ret)
>  			goto error;
>  		++sd->secondary_cnt;
> @@ -990,12 +1086,12 @@ mlx5_init_once(void)
>  error:
>  	switch (rte_eal_process_type()) {
>  	case RTE_PROC_PRIMARY:
> -		mlx5_uar_uninit_primary();
> +		uar_uninit_primary();
>  		mlx5_mp_uninit_primary();
> 
> 	rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> NULL);
>  		break;
>  	case RTE_PROC_SECONDARY:
> -		mlx5_uar_uninit_secondary();
> +		uar_uninit_secondary();
>  		mlx5_mp_uninit_secondary();
>  		break;
>  	default:
> @@ -1099,7 +1195,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		if (err < 0)
>  			return NULL;
>  		/* Remap UAR for Tx queues. */
> -		err = mlx5_tx_uar_remap(eth_dev, err);
> +		err = mlx5_txq_uar_init(eth_dev, err);
>  		if (err)
>  			return NULL;
>  		/*
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 8ce8361a85..f77517bee0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -97,8 +97,8 @@ struct mlx5_shared_data {
>  	/* Global spinlock for primary and secondary processes. */
>  	int init_done; /* Whether primary has done initialization. */
>  	unsigned int secondary_cnt; /* Number of secondary processes
> init'd. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +	struct rte_bitmap *uar_bmp;
> +	/* Bitmap to keep track of BlueFlame register table. */
>  	struct mlx5_dev_list mem_event_cb_list;
>  	rte_rwlock_t mem_event_rwlock;
>  };
> @@ -106,11 +106,19 @@ struct mlx5_shared_data {
>  /* Per-process data structure, not visible to other processes. */  struct
> mlx5_local_data {
>  	int init_done; /* Whether a secondary has done initialization. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +	void *(*uar_table)[];
> +	/* Table of BlueFlame registers for each process. */
> +	size_t uar_table_sz;
> +	/* Size of BlueFlame register table. */
>  };
> 
>  extern struct mlx5_shared_data *mlx5_shared_data;
> +extern struct mlx5_local_data mlx5_local_data;
> +
> +/* The maximum size of BlueFlame register table. */ #define
> +MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS *
> RTE_MAX_QUEUES_PER_PORT)
> +
> +#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])

Same concern - what if the uar table is expanded due to other device in configuration stage?

> 
>  struct mlx5_counter_ctrl {
>  	/* Name of the counter. */
> @@ -331,6 +339,9 @@ struct mlx5_priv {
>  /* mlx5.c */
> 
>  int mlx5_getenv_int(const char *);
> +void **mlx5_uar_get_addr_ptr(uint32_t idx); uint32_t
> +mlx5_uar_alloc_index(void); void mlx5_uar_free_index(uint32_t idx);
> 
>  /* mlx5_ethdev.c */
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index ced9945888..b32c1d6e0f 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -203,7 +203,7 @@ struct mlx5_txq_data {
>  	volatile void *wqes; /* Work queue (use volatile to write into). */
>  	volatile uint32_t *qp_db; /* Work queue doorbell. */
>  	volatile uint32_t *cq_db; /* Completion queue doorbell. */
> -	volatile void *bf_reg; /* Blueflame register remapped. */
> +	uint32_t bfreg_idx; /* Blueflame register index. */
>  	struct rte_mbuf *(*elts)[]; /* TX elements. */
>  	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
> RTE_ARCH_64 @@ -232,7 +232,7 @@ struct mlx5_txq_ctrl {
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	struct mlx5_txq_data txq; /* Data path structure. */
>  	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
> process. */
> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
> +	void *bf_reg; /* BlueFlame register from Verbs. */
>  	uint16_t idx; /* Queue index. */
>  };
> 
> @@ -303,7 +303,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
>  			unsigned int socket, const struct rte_eth_txconf
> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
> +int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
>  struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
> -706,7 +706,7 @@ static __rte_always_inline void
> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
> mlx5_wqe *wqe,
>  		       int cond)
>  {
> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
> +	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
>  	volatile uint64_t *src = ((volatile uint64_t *)wqe);
> 
>  	rte_cio_wmb();
> diff --git a/drivers/net/mlx5/mlx5_trigger.c
> b/drivers/net/mlx5/mlx5_trigger.c index 5b73f0ff03..d7f27702e8 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
>  			goto error;
>  		}
>  	}
> -	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
> +	ret = mlx5_txq_uar_init(dev, priv->sh->ctx->cmd_fd);
>  	if (ret) {
>  		/* Adjust index for rollback. */
>  		i = priv->txqs_n - 1;
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 1b3d89f2f6..d8e0bda371 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
> 
> 
>  /**
> - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
> - * Both primary and secondary process do mmap to make UAR address
> - * aligned.
> + * Initialize UAR register access for Tx.
> + *
> + * For both primary and secondary, initialize UAR locks for atomic access.
> + *
> + * For secondary, remap BlueFlame registers for secondary process.
> + Remapped
> + * address is stored at the same indexed entry of the local UAR
> + register table
> + * as primary process.
>   *
>   * @param[in] dev
>   *   Pointer to Ethernet device.
> @@ -244,75 +248,52 @@ mlx5_tx_queue_release(void *dpdk_txq)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
> +mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	unsigned int i, j;
> -	uintptr_t pages[priv->txqs_n];
> -	unsigned int pages_n = 0;
> -	uintptr_t uar_va;
> -	uintptr_t off;
> -	void *addr;
> -	void *ret;
>  	struct mlx5_txq_data *txq;
>  	struct mlx5_txq_ctrl *txq_ctrl;
> -	int already_mapped;
> +	void *addr;
> +	void **addr_ptr;
> +	uintptr_t uar_va;
> +	uintptr_t offset;
>  	size_t page_size = sysconf(_SC_PAGESIZE);
> +	unsigned int i;
>  #ifndef RTE_ARCH_64
>  	unsigned int lock_idx;
>  #endif
> 
> -	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
> -	/*
> -	 * As rdma-core, UARs are mapped in size of OS page size.
> -	 * Use aligned address to avoid duplicate mmap.
> -	 * Ref to libmlx5 function: mlx5_init_context()
> -	 */
>  	for (i = 0; i != priv->txqs_n; ++i) {
>  		if (!(*priv->txqs)[i])
>  			continue;
>  		txq = (*priv->txqs)[i];
>  		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
>  		assert(txq_ctrl->idx == (uint16_t)i);
> -		/* UAR addr form verbs used to find dup and offset in page.
> */
> -		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
> -		off = uar_va & (page_size - 1); /* offset in page. */
> -		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page
> addr. */
> -		already_mapped = 0;
> -		for (j = 0; j != pages_n; ++j) {
> -			if (pages[j] == uar_va) {
> -				already_mapped = 1;
> -				break;
> -			}
> -		}
> -		/* new address in reserved UAR address space. */
> -		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
> -				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
> -		if (!already_mapped) {
> -			pages[pages_n++] = uar_va;
> -			/* fixed mmap to specified address in reserved
> -			 * address space.
> +		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +			/*
> +			 * As rdma-core, UARs are mapped in size of OS page
> +			 * size. Ref to libmlx5 function: mlx5_init_context()
>  			 */
> -			ret = mmap(addr, page_size,
> -				   PROT_WRITE, MAP_FIXED | MAP_SHARED,
> fd,
> -				   txq_ctrl->uar_mmap_offset);
> -			if (ret != addr) {
> -				/* fixed mmap have to return same address
> */
> +			uar_va = (uintptr_t)txq_ctrl->bf_reg;
> +			offset = uar_va & (page_size - 1); /* Offset in page.
> */
> +			addr = mmap(NULL, page_size, PROT_WRITE,
> MAP_SHARED, fd,
> +				    txq_ctrl->uar_mmap_offset);
> +			if (addr == MAP_FAILED) {
>  				DRV_LOG(ERR,
> -					"port %u call to mmap failed on UAR"
> -					" for txq %u",
> +					"port %u mmap failed for BF reg."
> +					" of txq %u",
>  					dev->data->port_id, txq_ctrl->idx);
>  				rte_errno = ENXIO;
>  				return -rte_errno;
>  			}
> +			addr = RTE_PTR_ADD(addr, offset);
> +			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
> +			if (!addr_ptr)
> +				return -rte_errno;
> +			*addr_ptr = addr;
>  		}
> -		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save
> once */
> -			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr,
> off);
> -		else
> -			assert(txq_ctrl->txq.bf_reg ==
> -			       RTE_PTR_ADD((void *)addr, off));
>  #ifndef RTE_ARCH_64
> -		/* Assign a UAR lock according to UAR page number */
> +		/* Assign an UAR lock according to UAR page number */
>  		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
>  			   MLX5_UAR_PAGE_NUM_MASK;
>  		txq->uar_lock = &priv->uar_lock[lock_idx]; @@ -372,6 +353,7
> @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
>  	struct mlx5dv_obj obj;
>  	const int desc = 1 << txq_data->elts_n;
>  	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
> +	void **addr_ptr;
>  	int ret = 0;
> 
>  	assert(txq_data);
> @@ -507,7 +489,17 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_data->wqes = qp.sq.buf;
>  	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
>  	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
> -	txq_ctrl->bf_reg_orig = qp.bf.reg;
> +	/* Allocate a new index in UAR table. */
> +	ret = mlx5_uar_alloc_index();
> +	if (ret < 0)
> +		goto error;
> +	txq_data->bfreg_idx = ret;
> +	txq_ctrl->bf_reg = qp.bf.reg;
> +	/* Store the BlueFlame register address in the local table. */
> +	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
> +	if (!addr_ptr)
> +		goto error;
> +	*addr_ptr = txq_ctrl->bf_reg;
>  	txq_data->cq_db = cq_info.dbrec;
>  	txq_data->cqes =
>  		(volatile struct mlx5_cqe (*)[])
> @@ -589,6 +581,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
> {
>  	assert(txq_ibv);
>  	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
> +		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
>  		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
>  		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
>  		LIST_REMOVE(txq_ibv, next);
> @@ -837,15 +830,12 @@ mlx5_txq_release(struct rte_eth_dev *dev,
> uint16_t idx)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_txq_ctrl *txq;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> 
>  	if (!(*priv->txqs)[idx])
>  		return 0;
>  	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
>  	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
>  		txq->ibv = NULL;
> -	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
> page_size),
> -	       page_size);
>  	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
>  		txq_free_elts(txq);
>  		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
  2019-04-02  6:50     ` Shahaf Shuler
@ 2019-04-02  6:50       ` Shahaf Shuler
  0 siblings, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-02  6:50 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Hi Koh,

See my comments below, same comments apply for mlx4 patch. 

Tuesday, April 2, 2019 12:22 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
> 
> UAR (User Access Region) registers will be stored in a process-local table and
> a process accesses a register in a table entry with index. Alloc/free of table
> entry is managed by a global bitmap.
> 
> When there's a need to store a UAR register such as Tx BlueFlame register
> for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
> address of the allocated table entry must be acquired by
> mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
> The local UAR register table doesn't cover all the indexes in the bitmap.
> This will be expanded if more indexes are allocated than the current size of
> the table.
> 
> For example, the BlueFlame register for Tx doorbell has to be remapped on
> each secondary process. On initialization, primary process allocates an index
> for the UAR register table and stores the register address in the indexed
> entry of its own table when configuring a Tx queue. The index is stored in the
> shared memory(txq->bfreg_idx) and visiable to secondary processes. As
> secondary processes know the index, each process stores remapped register
> in the same indexed entry of its local UAR register table.
> 
> On the datapath of each process, the register can be referenced simply by
> MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++--------
> -----
>  drivers/net/mlx5/mlx5.h         |  19 ++-
>  drivers/net/mlx5/mlx5_rxtx.h    |   8 +-
>  drivers/net/mlx5/mlx5_trigger.c |   2 +-
>  drivers/net/mlx5/mlx5_txq.c     |  96 +++++++--------
>  5 files changed, 242 insertions(+), 145 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 40445056f5..103841b2bc 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -25,6 +25,7 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> 
> +#include <rte_bitmap.h>
>  #include <rte_malloc.h>
>  #include <rte_ethdev_driver.h>
>  #include <rte_ethdev_pci.h>
> @@ -131,7 +132,7 @@ struct mlx5_shared_data *mlx5_shared_data;  static
> rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> 
>  /* Process local data for secondary processes. */ -static struct
> mlx5_local_data mlx5_local_data;
> +struct mlx5_local_data mlx5_local_data;
> 
>  /** Driver-specific log messages type. */  int mlx5_logtype; @@ -810,130
> +811,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs
> *devargs)
> 
>  static struct rte_pci_driver mlx5_driver;
> 
> +
> +/**
> + * Expand the local UAR register table.
> + *
> + * @param size
> + *   Size of the table to be expanded
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
>  static int
> -find_lower_va_bound(const struct rte_memseg_list *msl,
> -		const struct rte_memseg *ms, void *arg)
> +uar_expand_table(uint32_t size)

This function needs to be protected w/ mutex. Since it can be called by multiple control threads on different eth devices. 

>  {
> -	void **addr = arg;
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +	void *mem;
> +	size_t tbl_sz = ld->uar_table_sz;
> 
> -	if (msl->external)
> +	if (size <= tbl_sz)
>  		return 0;
> -	if (*addr == NULL)
> -		*addr = ms->addr;
> -	else
> -		*addr = RTE_MIN(*addr, ms->addr);
> -
> +	tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
> +	mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
> +			  RTE_CACHE_LINE_SIZE);
> +	if (!mem) {
> +		rte_errno = ENOMEM;
> +		DRV_LOG(ERR, "failed to expand uar table");
> +		return -rte_errno;
> +	}
> +	DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
> +	ld->uar_table = mem;
> +	ld->uar_table_sz = tbl_sz;
>  	return 0;
>  }
> 
>  /**
> - * Reserve UAR address space for primary process.
> + * Return the pointer of the indexed slot in the local UAR register table.
>   *
> - * Process local resource is used by both primary and secondary to avoid
> - * duplicate reservation. The space has to be available on both primary and
> - * secondary process, TXQ UAR maps to this area using fixed mmap w/o
> double
> - * check.
> + * The indexed slot must be allocated by mlx5_uar_alloc_index() in
> + advance. And
> + * the table will be expanded if overflowed.
> + *
> + * @param idx
> + *   Index of the table.
>   *
>   * @return
> - *   0 on success, a negative errno value otherwise and rte_errno is set.
> + *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
>   */
> -static int
> -mlx5_uar_init_primary(void)
> +void **
> +mlx5_uar_get_addr_ptr(uint32_t idx)

Wondering if we can possibly have coherency issue here.
Suppose we have 2 eth devices. One is doing datapath, one is at configuration stage. 
The one on configuration stage may trigger the expand
The one on datapath may read from the UAR table.

> +{
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +	int ret;
> +
> +	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +	if (idx >= ld->uar_table_sz) {
> +		ret = uar_expand_table(idx + 1);
> +		if (ret)
> +			return NULL;
> +	}
> +	return &(*ld->uar_table)[idx];
> +}
> +
> +/**
> + * Allocate a slot of UAR register table.
> + *
> + * Allocation is done by scanning the global bitmap. The global
> +spinlock should
> + * be held.
> + *
> + * @return
> + *   Index of a free slot on success, a negative errno value otherwise and
> + *   rte_errno is set.
> + */
> +uint32_t
> +mlx5_uar_alloc_index(void)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> -	void *addr = (void *)0;
> +	uint32_t idx = 0;
> +	uint64_t slab = 0;
> +	int ret;
> 
> -	if (sd->uar_base)
> -		return 0;
> -	/* find out lower bound of hugepage segments */
> -	rte_memseg_walk(find_lower_va_bound, &addr);
> -	/* keep distance to hugepages to minimize potential conflicts. */
> -	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET +
> MLX5_UAR_SIZE));
> -	/* anonymous mmap, no real memory consumption. */
> -	addr = mmap(addr, MLX5_UAR_SIZE,
> -		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -	if (addr == MAP_FAILED) {
> -		DRV_LOG(ERR,
> -			"Failed to reserve UAR address space, please"
> -			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
> -		rte_errno = ENOMEM;
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	rte_spinlock_lock(&sd->lock);
> +	__rte_bitmap_scan_init(sd->uar_bmp);
> +	ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
> +	if (unlikely(!ret)) {
> +		/*
> +		 * This cannot happen unless there are unreasonably large
> number
> +		 * of queues and ports.
> +		 */
> +		rte_errno = ENOSPC;
> +		rte_spinlock_unlock(&sd->lock);
>  		return -rte_errno;
>  	}
> -	/* Accept either same addr or a new addr returned from mmap if
> target
> -	 * range occupied.
> -	 */
> -	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> -	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
> -	return 0;
> +	idx += __builtin_ctzll(slab);
> +	/* Mark the slot is occupied. */
> +	rte_bitmap_clear(sd->uar_bmp, idx);
> +	rte_spinlock_unlock(&sd->lock);
> +	DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
> +	return idx;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for primary process.
> + * Free a slot of UAR register table.
>   */
> -static void
> -mlx5_uar_uninit_primary(void)
> +void
> +mlx5_uar_free_index(uint32_t idx)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> 
> -	if (!sd->uar_base)
> -		return;
> -	munmap(sd->uar_base, MLX5_UAR_SIZE);
> -	sd->uar_base = NULL;
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +	rte_spinlock_lock(&sd->lock);
> +	/* Mark the slot is empty. */
> +	rte_bitmap_set(sd->uar_bmp, idx);
> +	rte_spinlock_unlock(&sd->lock);
> +	DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
>  }
> 
>  /**
> - * Reserve UAR address space for secondary process, align with primary
> process.
> + * Initialize UAR register table bitmap.
> + *
> + * UAR registers will be stored in a process-local table and the table
> + is
> + * managed by a global bitmap. When there's a need to store a UAR
> + register, an
> + * index should be allocated by mlx5_uar_alloc_index() and address of
> + the
> + * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so
> + that the
> + * table can be expanded if overflowed.
> + *
> + * The local UAR register table doesn't cover all the indexes in the bitmap.
> + * This will be expanded if more indexes are allocated than the current
> + size of
> + * the table.
> + *
> + * Secondary process should have reference of the index and store
> + remapped
> + * register at the same index in its local UAR register table.
> + *
> + * On the datapath of each process, the register can be referenced
> + simply by
> + * MLX5_UAR_REG(idx).
>   *
>   * @return
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_uar_init_secondary(void)
> +uar_init_primary(void)
>  {
>  	struct mlx5_shared_data *sd = mlx5_shared_data;
> -	struct mlx5_local_data *ld = &mlx5_local_data;
> -	void *addr;
> +	struct rte_bitmap *bmp;
> +	void *bmp_mem;
> +	uint32_t bmp_size;
> +	unsigned int i;
> 
> -	if (ld->uar_base) { /* Already reserved. */
> -		assert(sd->uar_base == ld->uar_base);
> -		return 0;
> -	}
> -	assert(sd->uar_base);
> -	/* anonymous mmap, no real memory consumption. */
> -	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
> -		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -	if (addr == MAP_FAILED) {
> -		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
> -			sd->uar_base, MLX5_UAR_SIZE);
> -		rte_errno = ENXIO;
> +	bmp_size =
> rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
> +	bmp_mem = rte_zmalloc("uar_table", bmp_size,
> RTE_CACHE_LINE_SIZE);
> +	if (!bmp_mem) {
> +		rte_errno = ENOMEM;
> +		DRV_LOG(ERR, "failed to allocate memory for uar table");
>  		return -rte_errno;
>  	}
> -	if (sd->uar_base != addr) {
> -		DRV_LOG(ERR,
> -			"UAR address %p size %llu occupied, please"
> -			" adjust MLX5_UAR_OFFSET or try EAL parameter"
> -			" --base-virtaddr",
> -			sd->uar_base, MLX5_UAR_SIZE);
> -		rte_errno = ENXIO;
> -		return -rte_errno;
> +	bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem,
> bmp_size);
> +	/* Set the entire bitmap as 1 means vacant and 0 means empty. */
> +	for (i = 0; i < bmp->array2_size; ++i)
> +		rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -
> 1);
> +	sd->uar_bmp = bmp;
> +	return 0;
> +}
> +
> +/**
> + * Un-initialize UAR register resources.
> + *
> + * The global bitmap and the register table of primary process are freed.
> + */
> +static void
> +uar_uninit_primary(void)
> +{
> +	struct mlx5_shared_data *sd = mlx5_shared_data;
> +	struct mlx5_local_data *ld = &mlx5_local_data;
> +
> +	if (sd->uar_bmp) {
> +		rte_bitmap_free(sd->uar_bmp);
> +		rte_free(sd->uar_bmp);
> +		sd->uar_bmp = NULL;
> +	}
> +	/* Free primary's table. */
> +	if (ld->uar_table) {
> +		rte_free(ld->uar_table);
> +		ld->uar_table = NULL;
> +		ld->uar_table_sz = 0;
>  	}
> -	ld->uar_base = addr;
> -	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> +}
> +
> +/**
> + * Initialize UAR register resources for secondary process.
> + *
> + * Allocate the local UAR register table. Initially, the number of
> +entries is
> + * same as the size of a bitmap slab.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +uar_init_secondary(void)
> +{
> +	/* Prepare at least a bitmap slab. */
> +	uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
>  	return 0;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for secondary process.
> + * Un-initialize UAR register resources for secondary process.
> + *
> + * The local UAR register table is freed.
>   */
>  static void
> -mlx5_uar_uninit_secondary(void)
> +uar_uninit_secondary(void)
>  {
>  	struct mlx5_local_data *ld = &mlx5_local_data;
> 
> -	if (!ld->uar_base)
> -		return;
> -	munmap(ld->uar_base, MLX5_UAR_SIZE);
> -	ld->uar_base = NULL;
> +	/* Free process-local table. */
> +	if (ld->uar_table) {
> +		rte_free(ld->uar_table);
> +		ld->uar_table = NULL;
> +		ld->uar_table_sz = 0;
> +	}
>  }
> 
>  /**
> @@ -967,7 +1063,7 @@ mlx5_init_once(void)
> 
> 	rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
>  						mlx5_mr_mem_event_cb,
> NULL);
>  		mlx5_mp_init_primary();
> -		ret = mlx5_uar_init_primary();
> +		ret = uar_init_primary();
>  		if (ret)
>  			goto error;
>  		sd->init_done = true;
> @@ -976,7 +1072,7 @@ mlx5_init_once(void)
>  		if (ld->init_done)
>  			break;
>  		mlx5_mp_init_secondary();
> -		ret = mlx5_uar_init_secondary();
> +		ret = uar_init_secondary();
>  		if (ret)
>  			goto error;
>  		++sd->secondary_cnt;
> @@ -990,12 +1086,12 @@ mlx5_init_once(void)
>  error:
>  	switch (rte_eal_process_type()) {
>  	case RTE_PROC_PRIMARY:
> -		mlx5_uar_uninit_primary();
> +		uar_uninit_primary();
>  		mlx5_mp_uninit_primary();
> 
> 	rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> NULL);
>  		break;
>  	case RTE_PROC_SECONDARY:
> -		mlx5_uar_uninit_secondary();
> +		uar_uninit_secondary();
>  		mlx5_mp_uninit_secondary();
>  		break;
>  	default:
> @@ -1099,7 +1195,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		if (err < 0)
>  			return NULL;
>  		/* Remap UAR for Tx queues. */
> -		err = mlx5_tx_uar_remap(eth_dev, err);
> +		err = mlx5_txq_uar_init(eth_dev, err);
>  		if (err)
>  			return NULL;
>  		/*
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 8ce8361a85..f77517bee0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -97,8 +97,8 @@ struct mlx5_shared_data {
>  	/* Global spinlock for primary and secondary processes. */
>  	int init_done; /* Whether primary has done initialization. */
>  	unsigned int secondary_cnt; /* Number of secondary processes
> init'd. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +	struct rte_bitmap *uar_bmp;
> +	/* Bitmap to keep track of BlueFlame register table. */
>  	struct mlx5_dev_list mem_event_cb_list;
>  	rte_rwlock_t mem_event_rwlock;
>  };
> @@ -106,11 +106,19 @@ struct mlx5_shared_data {
>  /* Per-process data structure, not visible to other processes. */  struct
> mlx5_local_data {
>  	int init_done; /* Whether a secondary has done initialization. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +	void *(*uar_table)[];
> +	/* Table of BlueFlame registers for each process. */
> +	size_t uar_table_sz;
> +	/* Size of BlueFlame register table. */
>  };
> 
>  extern struct mlx5_shared_data *mlx5_shared_data;
> +extern struct mlx5_local_data mlx5_local_data;
> +
> +/* The maximum size of BlueFlame register table. */ #define
> +MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS *
> RTE_MAX_QUEUES_PER_PORT)
> +
> +#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])

Same concern - what if the uar table is expanded due to other device in configuration stage?

> 
>  struct mlx5_counter_ctrl {
>  	/* Name of the counter. */
> @@ -331,6 +339,9 @@ struct mlx5_priv {
>  /* mlx5.c */
> 
>  int mlx5_getenv_int(const char *);
> +void **mlx5_uar_get_addr_ptr(uint32_t idx); uint32_t
> +mlx5_uar_alloc_index(void); void mlx5_uar_free_index(uint32_t idx);
> 
>  /* mlx5_ethdev.c */
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index ced9945888..b32c1d6e0f 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -203,7 +203,7 @@ struct mlx5_txq_data {
>  	volatile void *wqes; /* Work queue (use volatile to write into). */
>  	volatile uint32_t *qp_db; /* Work queue doorbell. */
>  	volatile uint32_t *cq_db; /* Completion queue doorbell. */
> -	volatile void *bf_reg; /* Blueflame register remapped. */
> +	uint32_t bfreg_idx; /* Blueflame register index. */
>  	struct rte_mbuf *(*elts)[]; /* TX elements. */
>  	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
> RTE_ARCH_64 @@ -232,7 +232,7 @@ struct mlx5_txq_ctrl {
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	struct mlx5_txq_data txq; /* Data path structure. */
>  	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
> process. */
> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
> +	void *bf_reg; /* BlueFlame register from Verbs. */
>  	uint16_t idx; /* Queue index. */
>  };
> 
> @@ -303,7 +303,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
>  			unsigned int socket, const struct rte_eth_txconf
> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
> +int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
>  struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
> -706,7 +706,7 @@ static __rte_always_inline void
> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
> mlx5_wqe *wqe,
>  		       int cond)
>  {
> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
> +	uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
>  	volatile uint64_t *src = ((volatile uint64_t *)wqe);
> 
>  	rte_cio_wmb();
> diff --git a/drivers/net/mlx5/mlx5_trigger.c
> b/drivers/net/mlx5/mlx5_trigger.c index 5b73f0ff03..d7f27702e8 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
>  			goto error;
>  		}
>  	}
> -	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
> +	ret = mlx5_txq_uar_init(dev, priv->sh->ctx->cmd_fd);
>  	if (ret) {
>  		/* Adjust index for rollback. */
>  		i = priv->txqs_n - 1;
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 1b3d89f2f6..d8e0bda371 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
> 
> 
>  /**
> - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
> - * Both primary and secondary process do mmap to make UAR address
> - * aligned.
> + * Initialize UAR register access for Tx.
> + *
> + * For both primary and secondary, initialize UAR locks for atomic access.
> + *
> + * For secondary, remap BlueFlame registers for secondary process.
> + Remapped
> + * address is stored at the same indexed entry of the local UAR
> + register table
> + * as primary process.
>   *
>   * @param[in] dev
>   *   Pointer to Ethernet device.
> @@ -244,75 +248,52 @@ mlx5_tx_queue_release(void *dpdk_txq)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
> +mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	unsigned int i, j;
> -	uintptr_t pages[priv->txqs_n];
> -	unsigned int pages_n = 0;
> -	uintptr_t uar_va;
> -	uintptr_t off;
> -	void *addr;
> -	void *ret;
>  	struct mlx5_txq_data *txq;
>  	struct mlx5_txq_ctrl *txq_ctrl;
> -	int already_mapped;
> +	void *addr;
> +	void **addr_ptr;
> +	uintptr_t uar_va;
> +	uintptr_t offset;
>  	size_t page_size = sysconf(_SC_PAGESIZE);
> +	unsigned int i;
>  #ifndef RTE_ARCH_64
>  	unsigned int lock_idx;
>  #endif
> 
> -	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
> -	/*
> -	 * As rdma-core, UARs are mapped in size of OS page size.
> -	 * Use aligned address to avoid duplicate mmap.
> -	 * Ref to libmlx5 function: mlx5_init_context()
> -	 */
>  	for (i = 0; i != priv->txqs_n; ++i) {
>  		if (!(*priv->txqs)[i])
>  			continue;
>  		txq = (*priv->txqs)[i];
>  		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
>  		assert(txq_ctrl->idx == (uint16_t)i);
> -		/* UAR addr form verbs used to find dup and offset in page.
> */
> -		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
> -		off = uar_va & (page_size - 1); /* offset in page. */
> -		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page
> addr. */
> -		already_mapped = 0;
> -		for (j = 0; j != pages_n; ++j) {
> -			if (pages[j] == uar_va) {
> -				already_mapped = 1;
> -				break;
> -			}
> -		}
> -		/* new address in reserved UAR address space. */
> -		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
> -				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
> -		if (!already_mapped) {
> -			pages[pages_n++] = uar_va;
> -			/* fixed mmap to specified address in reserved
> -			 * address space.
> +		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +			/*
> +			 * As rdma-core, UARs are mapped in size of OS page
> +			 * size. Ref to libmlx5 function: mlx5_init_context()
>  			 */
> -			ret = mmap(addr, page_size,
> -				   PROT_WRITE, MAP_FIXED | MAP_SHARED,
> fd,
> -				   txq_ctrl->uar_mmap_offset);
> -			if (ret != addr) {
> -				/* fixed mmap have to return same address
> */
> +			uar_va = (uintptr_t)txq_ctrl->bf_reg;
> +			offset = uar_va & (page_size - 1); /* Offset in page.
> */
> +			addr = mmap(NULL, page_size, PROT_WRITE,
> MAP_SHARED, fd,
> +				    txq_ctrl->uar_mmap_offset);
> +			if (addr == MAP_FAILED) {
>  				DRV_LOG(ERR,
> -					"port %u call to mmap failed on UAR"
> -					" for txq %u",
> +					"port %u mmap failed for BF reg."
> +					" of txq %u",
>  					dev->data->port_id, txq_ctrl->idx);
>  				rte_errno = ENXIO;
>  				return -rte_errno;
>  			}
> +			addr = RTE_PTR_ADD(addr, offset);
> +			addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
> +			if (!addr_ptr)
> +				return -rte_errno;
> +			*addr_ptr = addr;
>  		}
> -		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save
> once */
> -			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr,
> off);
> -		else
> -			assert(txq_ctrl->txq.bf_reg ==
> -			       RTE_PTR_ADD((void *)addr, off));
>  #ifndef RTE_ARCH_64
> -		/* Assign a UAR lock according to UAR page number */
> +		/* Assign an UAR lock according to UAR page number */
>  		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
>  			   MLX5_UAR_PAGE_NUM_MASK;
>  		txq->uar_lock = &priv->uar_lock[lock_idx]; @@ -372,6 +353,7
> @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
>  	struct mlx5dv_obj obj;
>  	const int desc = 1 << txq_data->elts_n;
>  	eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
> +	void **addr_ptr;
>  	int ret = 0;
> 
>  	assert(txq_data);
> @@ -507,7 +489,17 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_data->wqes = qp.sq.buf;
>  	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
>  	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
> -	txq_ctrl->bf_reg_orig = qp.bf.reg;
> +	/* Allocate a new index in UAR table. */
> +	ret = mlx5_uar_alloc_index();
> +	if (ret < 0)
> +		goto error;
> +	txq_data->bfreg_idx = ret;
> +	txq_ctrl->bf_reg = qp.bf.reg;
> +	/* Store the BlueFlame register address in the local table. */
> +	addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
> +	if (!addr_ptr)
> +		goto error;
> +	*addr_ptr = txq_ctrl->bf_reg;
>  	txq_data->cq_db = cq_info.dbrec;
>  	txq_data->cqes =
>  		(volatile struct mlx5_cqe (*)[])
> @@ -589,6 +581,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
> {
>  	assert(txq_ibv);
>  	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
> +		mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
>  		claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
>  		claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
>  		LIST_REMOVE(txq_ibv, next);
> @@ -837,15 +830,12 @@ mlx5_txq_release(struct rte_eth_dev *dev,
> uint16_t idx)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_txq_ctrl *txq;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> 
>  	if (!(*priv->txqs)[idx])
>  		return 0;
>  	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
>  	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
>  		txq->ibv = NULL;
> -	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
> page_size),
> -	       page_size);
>  	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
>  		txq_free_elts(txq);
>  		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] net/mlx: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
                   ` (4 preceding siblings ...)
  2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
@ 2019-04-05  1:33 ` Yongseok Koh
  2019-04-05  1:33   ` Yongseok Koh
                     ` (4 more replies)
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
  7 siblings, 5 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 232 +++++++------------------------------
 drivers/net/mlx4/mlx4.h            |  15 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 ++++++++++++++++-----------
 drivers/net/mlx5/mlx5.c            | 198 ++++---------------------------
 drivers/net/mlx5/mlx5.h            |  16 ++-
 drivers/net/mlx5/mlx5_ethdev.c     |  17 +++
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 +++--
 drivers/net/mlx5/mlx5_rxtx.h       |  21 ++--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 199 ++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 18 files changed, 378 insertions(+), 570 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 0/4] net/mlx: remove device register remap
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
@ 2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 232 +++++++------------------------------
 drivers/net/mlx4/mlx4.h            |  15 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 ++++++++++++++++-----------
 drivers/net/mlx5/mlx5.c            | 198 ++++---------------------------
 drivers/net/mlx5/mlx5.h            |  16 ++-
 drivers/net/mlx5/mlx5_ethdev.c     |  17 +++
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 +++--
 drivers/net/mlx5/mlx5_rxtx.h       |  21 ++--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 199 ++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 18 files changed, 378 insertions(+), 570 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
  2019-04-05  1:33   ` Yongseok Koh
@ 2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index Yongseok Koh
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ef05d9f975..699c8fcf6d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-05  1:33     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ef05d9f975..699c8fcf6d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
  2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
  2019-04-08  5:24     ` Shahaf Shuler
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap Yongseok Koh
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 4/4] net/mlx4: " Yongseok Koh
  4 siblings, 2 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-05  1:33     ` Yongseok Koh
  2019-04-08  5:24     ` Shahaf Shuler
  1 sibling, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
                     ` (2 preceding siblings ...)
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
  2019-04-08  5:48     ` Shahaf Shuler
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 4/4] net/mlx4: " Yongseok Koh
  4 siblings, 2 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
 drivers/net/mlx5/mlx5.h         |  15 ++-
 drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
 drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++--------------
 6 files changed, 168 insertions(+), 259 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f571ba2e97..c28a66fa07 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -589,6 +565,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -906,132 +884,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1047,7 +899,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1063,18 +914,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1083,23 +928,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	}
 	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		struct mlx5_proc_priv *ppriv;
+		size_t ppriv_size;
+
 		eth_dev = rte_eth_dev_attach_secondary(name);
 		if (eth_dev == NULL) {
 			DRV_LOG(ERR, "can not attach rte ethdev");
 			rte_errno = ENOMEM;
 			return NULL;
 		}
+		priv = eth_dev->data->dev_private;
+		/*
+		 * UAR register table follows the process private structure.
+		 * BlueFlame registers for Tx queues come first and registers
+		 * for Rx queues follows.
+		 */
+		ppriv_size = sizeof(struct mlx5_proc_priv) +
+			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
+		ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+					  RTE_CACHE_LINE_SIZE,
+					  dpdk_dev->numa_node);
+		if (!ppriv) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+		ppriv->uar_table_sz = ppriv_size;
+		eth_dev->process_private = ppriv;
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
 		/* Receive command fd from primary process */
@@ -1195,7 +1043,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 699c8fcf6d..1ac4ad71b1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 9ae9dddd3c..42297f11c9 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -382,6 +382,8 @@ int
 mlx5_dev_configure(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int i;
@@ -450,6 +452,21 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues come first and registers for Rx queues
+	 * follows.
+	 */
+	ppriv_size = sizeof(struct mlx5_proc_priv) +
+		     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..5fb1761955 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	unsigned int lock_idx;
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Remap UAR register of a Tx queue for secondary process.
  *
- * @param[in] dev
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-05  1:33     ` Yongseok Koh
  2019-04-08  5:48     ` Shahaf Shuler
  1 sibling, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
 drivers/net/mlx5/mlx5.h         |  15 ++-
 drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
 drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++--------------
 6 files changed, 168 insertions(+), 259 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f571ba2e97..c28a66fa07 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -589,6 +565,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -906,132 +884,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1047,7 +899,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1063,18 +914,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1083,23 +928,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	}
 	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		struct mlx5_proc_priv *ppriv;
+		size_t ppriv_size;
+
 		eth_dev = rte_eth_dev_attach_secondary(name);
 		if (eth_dev == NULL) {
 			DRV_LOG(ERR, "can not attach rte ethdev");
 			rte_errno = ENOMEM;
 			return NULL;
 		}
+		priv = eth_dev->data->dev_private;
+		/*
+		 * UAR register table follows the process private structure.
+		 * BlueFlame registers for Tx queues come first and registers
+		 * for Rx queues follows.
+		 */
+		ppriv_size = sizeof(struct mlx5_proc_priv) +
+			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
+		ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+					  RTE_CACHE_LINE_SIZE,
+					  dpdk_dev->numa_node);
+		if (!ppriv) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+		ppriv->uar_table_sz = ppriv_size;
+		eth_dev->process_private = ppriv;
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
 		/* Receive command fd from primary process */
@@ -1195,7 +1043,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 699c8fcf6d..1ac4ad71b1 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 9ae9dddd3c..42297f11c9 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -382,6 +382,8 @@ int
 mlx5_dev_configure(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
 	unsigned int i;
@@ -450,6 +452,21 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues come first and registers for Rx queues
+	 * follows.
+	 */
+	ppriv_size = sizeof(struct mlx5_proc_priv) +
+		     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..5fb1761955 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	unsigned int lock_idx;
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Remap UAR register of a Tx queue for secondary process.
  *
- * @param[in] dev
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] net/mlx4: remove device register remap
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
                     ` (3 preceding siblings ...)
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-05  1:33   ` Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
  4 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 232 ++++++++-----------------------------------
 drivers/net/mlx4/mlx4.h      |  15 ++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 +++++++++++++++++++------------
 6 files changed, 165 insertions(+), 263 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 252658fc6a..b22fe11e6c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -219,6 +195,8 @@ static int
 mlx4_dev_configure(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
 	struct rte_flow_error error;
 	int ret;
 
@@ -235,6 +213,22 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 	if (ret)
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues come first and registers for Rx queues
+	 * follows.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     (dev->data->nb_rx_queues + dev->data->nb_tx_queues) *
+		     sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
 exit:
 	return ret;
 }
@@ -262,11 +256,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,8 +303,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -326,18 +313,11 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	/* Disable datapath on secondary process. */
 	mlx4_mp_req_stop_rxtx(dev);
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -662,130 +642,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -801,7 +657,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -817,18 +672,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -837,23 +686,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -987,6 +819,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		snprintf(name, sizeof(name), "%s port %u",
 			 mlx4_glue->get_device_name(ibv_dev), port);
 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			struct mlx4_proc_priv *ppriv;
+			size_t ppriv_size;
+
 			eth_dev = rte_eth_dev_attach_secondary(name);
 			if (eth_dev == NULL) {
 				ERROR("can not attach rte ethdev");
@@ -1003,6 +838,25 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				err = rte_errno;
 				goto error;
 			}
+			/*
+			 * UAR register table follows the process private
+			 * structure.  BlueFlame registers for Tx queues come
+			 * first and registers for Rx queues follows.
+			 */
+			ppriv_size =
+				sizeof(struct mlx4_proc_priv) +
+				(eth_dev->data->nb_rx_queues +
+				 eth_dev->data->nb_tx_queues) * sizeof(void *);
+			ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+						  RTE_CACHE_LINE_SIZE,
+						  pci_dev->device.numa_node);
+			if (!ppriv) {
+				rte_errno = ENOMEM;
+				err = rte_errno;
+				goto error;
+			}
+			ppriv->uar_table_sz = ppriv_size;
+			eth_dev->process_private = ppriv;
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
 			/* Receive command fd from primary process. */
@@ -1012,7 +866,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..904c4f5c03 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +145,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v3 4/4] net/mlx4: remove device register remap
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 4/4] net/mlx4: " Yongseok Koh
@ 2019-04-05  1:33     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-05  1:33 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 232 ++++++++-----------------------------------
 drivers/net/mlx4/mlx4.h      |  15 ++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 +++++++++++++++++++------------
 6 files changed, 165 insertions(+), 263 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 252658fc6a..b22fe11e6c 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -219,6 +195,8 @@ static int
 mlx4_dev_configure(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
 	struct rte_flow_error error;
 	int ret;
 
@@ -235,6 +213,22 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 	if (ret)
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues come first and registers for Rx queues
+	 * follows.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     (dev->data->nb_rx_queues + dev->data->nb_tx_queues) *
+		     sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
 exit:
 	return ret;
 }
@@ -262,11 +256,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,8 +303,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
 
 	if (!priv->started)
 		return;
@@ -326,18 +313,11 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	rte_wmb();
 	/* Disable datapath on secondary process. */
 	mlx4_mp_req_stop_rxtx(dev);
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
 }
 
 /**
@@ -662,130 +642,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -801,7 +657,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -817,18 +672,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -837,23 +686,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -987,6 +819,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		snprintf(name, sizeof(name), "%s port %u",
 			 mlx4_glue->get_device_name(ibv_dev), port);
 		if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+			struct mlx4_proc_priv *ppriv;
+			size_t ppriv_size;
+
 			eth_dev = rte_eth_dev_attach_secondary(name);
 			if (eth_dev == NULL) {
 				ERROR("can not attach rte ethdev");
@@ -1003,6 +838,25 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				err = rte_errno;
 				goto error;
 			}
+			/*
+			 * UAR register table follows the process private
+			 * structure.  BlueFlame registers for Tx queues come
+			 * first and registers for Rx queues follows.
+			 */
+			ppriv_size =
+				sizeof(struct mlx4_proc_priv) +
+				(eth_dev->data->nb_rx_queues +
+				 eth_dev->data->nb_tx_queues) * sizeof(void *);
+			ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+						  RTE_CACHE_LINE_SIZE,
+						  pci_dev->device.numa_node);
+			if (!ppriv) {
+				rte_errno = ENOMEM;
+				err = rte_errno;
+				goto error;
+			}
+			ppriv->uar_table_sz = ppriv_size;
+			eth_dev->process_private = ppriv;
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
 			/* Receive command fd from primary process. */
@@ -1012,7 +866,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..904c4f5c03 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +145,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
@ 2019-04-08  5:24     ` Shahaf Shuler
  2019-04-08  5:24       ` Shahaf Shuler
  1 sibling, 1 reply; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-08  5:24 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Friday, April 5, 2019 4:34 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue
> index
> 
> Queue index is redundantly stored for both Rx and Tx structures.
> E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single storage -
> rxq->idx and txq->idx.
> 
> Also, rxq and txq are moved to the beginning of its control structure (rxq_ctrl
> and txq_ctrl) for cacheline alignment.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

Acked-by: Shahaf Shuler <shahafs@mellanox.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index
  2019-04-08  5:24     ` Shahaf Shuler
@ 2019-04-08  5:24       ` Shahaf Shuler
  0 siblings, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-08  5:24 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Friday, April 5, 2019 4:34 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue
> index
> 
> Queue index is redundantly stored for both Rx and Tx structures.
> E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single storage -
> rxq->idx and txq->idx.
> 
> Also, rxq and txq are moved to the beginning of its control structure (rxq_ctrl
> and txq_ctrl) for cacheline alignment.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

Acked-by: Shahaf Shuler <shahafs@mellanox.com>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap Yongseok Koh
  2019-04-05  1:33     ` Yongseok Koh
@ 2019-04-08  5:48     ` Shahaf Shuler
  2019-04-08  5:48       ` Shahaf Shuler
  2019-04-09 19:36       ` Yongseok Koh
  1 sibling, 2 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-08  5:48 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Hi Koh,

See small comments below. Same for mlx4 patch.


Friday, April 5, 2019 4:34 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
> 
> UAR (User Access Region) register does not need to be remapped for
> primary process but it should be remapped only for secondary process. UAR
> register table is in the process private structure in rte_eth_devices[],
> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
> 
> The actual UAR table follows the data structure and the table is used for both
> Tx and Rx.
> 
> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
> is defined to get a register for the txq. Processes access its own private data
> to acquire the register from the UAR table.
> 
> For Rx, the doorbell in UAR is required in arming CQ event. However, it is a
> known issue that the register isn't remapped for secondary process.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
>  drivers/net/mlx5/mlx5.h         |  15 ++-
>  drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
>  drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
>  drivers/net/mlx5/mlx5_trigger.c |   6 --
>  drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++-------------
> -

[...]

>  /**
> @@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	}
>  	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +		struct mlx5_proc_priv *ppriv;
> +		size_t ppriv_size;
> +
>  		eth_dev = rte_eth_dev_attach_secondary(name);
>  		if (eth_dev == NULL) {
>  			DRV_LOG(ERR, "can not attach rte ethdev");
>  			rte_errno = ENOMEM;
>  			return NULL;
>  		}
> +		priv = eth_dev->data->dev_private;
> +		/*
> +		 * UAR register table follows the process private structure.
> +		 * BlueFlame registers for Tx queues come first and registers
> +		 * for Rx queues follows.
> +		 */
> +		ppriv_size = sizeof(struct mlx5_proc_priv) +
> +			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);

Why you add also the rxqs_n? why not only the txqs? 

> +		ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
> +					  RTE_CACHE_LINE_SIZE,
> +					  dpdk_dev->numa_node);
> +		if (!ppriv) {
> +			rte_errno = ENOMEM;
> +			return NULL;
> +		}
> +		ppriv->uar_table_sz = ppriv_size;
> +		eth_dev->process_private = ppriv;
>  		eth_dev->device = dpdk_dev;
>  		eth_dev->dev_ops = &mlx5_dev_sec_ops;
>  		/* Receive command fd from primary process */ @@ -1195,7
> +1043,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		if (err < 0)
>  			return NULL;
>  		/* Remap UAR for Tx queues. */
> -		err = mlx5_tx_uar_remap(eth_dev, err);
> +		err = mlx5_tx_uar_init_secondary(eth_dev, err);
>  		if (err)
>  			return NULL;
>  		/*
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 699c8fcf6d..1ac4ad71b1 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -97,8 +97,6 @@ struct mlx5_shared_data {
>  	/* Global spinlock for primary and secondary processes. */
>  	int init_done; /* Whether primary has done initialization. */
>  	unsigned int secondary_cnt; /* Number of secondary processes
> init'd. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
>  	struct mlx5_dev_list mem_event_cb_list;
>  	rte_rwlock_t mem_event_rwlock;
>  };
> @@ -106,8 +104,6 @@ struct mlx5_shared_data {
>  /* Per-process data structure, not visible to other processes. */  struct
> mlx5_local_data {
>  	int init_done; /* Whether a secondary has done initialization. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
>  };
> 
>  extern struct mlx5_shared_data *mlx5_shared_data; @@ -282,6 +278,17
> @@ struct mlx5_ibv_shared {
>  	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
> };
> 
> +/* Per-process private structure. */
> +struct mlx5_proc_priv {
> +	size_t uar_table_sz;
> +	/* Size of UAR register table. */
> +	void *uar_table[];
> +	/* Table of UAR registers for each process. */ };
> +
> +#define MLX5_PROC_PRIV(port_id) \
> +	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
> +
>  struct mlx5_priv {
>  	LIST_ENTRY(mlx5_priv) mem_event_cb;
>  	/**< Called by memory event callback. */ diff --git
> a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index
> 9ae9dddd3c..42297f11c9 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -382,6 +382,8 @@ int
>  mlx5_dev_configure(struct rte_eth_dev *dev)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_proc_priv *ppriv;
> +	size_t ppriv_size;
>  	unsigned int rxqs_n = dev->data->nb_rx_queues;
>  	unsigned int txqs_n = dev->data->nb_tx_queues;
>  	unsigned int i;
> @@ -450,6 +452,21 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
>  		if (++j == rxqs_n)
>  			j = 0;
>  	}
> +	/*
> +	 * UAR register table follows the process private structure. BlueFlame
> +	 * registers for Tx queues come first and registers for Rx queues
> +	 * follows.
> +	 */
> +	ppriv_size = sizeof(struct mlx5_proc_priv) +
> +		     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);

Ditto. 

> +	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
> +				  RTE_CACHE_LINE_SIZE, dev->device-
> >numa_node);
> +	if (!ppriv) {
> +		rte_errno = ENOMEM;
> +		return -rte_errno;
> +	}
> +	ppriv->uar_table_sz = ppriv_size;
> +	dev->process_private = ppriv;
>  	return 0;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 7b58063ceb..5d49892429 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -201,8 +201,8 @@ struct mlx5_txq_data {
>  	volatile void *wqes; /* Work queue (use volatile to write into). */
>  	volatile uint32_t *qp_db; /* Work queue doorbell. */
>  	volatile uint32_t *cq_db; /* Completion queue doorbell. */
> -	volatile void *bf_reg; /* Blueflame register remapped. */
>  	struct rte_mbuf *(*elts)[]; /* TX elements. */
> +	uint16_t port_id; /* Port ID of device. */
>  	uint16_t idx; /* Queue index. */
>  	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
> RTE_ARCH_64 @@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
>  	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
> process. */
> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
> +	void *bf_reg; /* BlueFlame register from Verbs. */

I guess you keep this one in order to get the VA offset for the secondary mapping, right? Because otherwise we can take the bf_reg from the UAR table on the process private.

If so, better to rename it to uar_page_offset (or other name you like) in order to avoid fields duplication. 

>  };
> 
> +#define MLX5_TX_BFREG(txq) \
> +		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
> +
>  /* mlx5_rxq.c */
> 
>  extern uint8_t rss_hash_default_key[];
> @@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
>  			unsigned int socket, const struct rte_eth_txconf
> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
> +int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
>  struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
> -704,7 +707,7 @@ static __rte_always_inline void
> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
> mlx5_wqe *wqe,
>  		       int cond)
>  {
> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
> +	uint64_t *dst = MLX5_TX_BFREG(txq);

I guess no perf penalty due to this change right?
Would you consider to prefetch the addr before the db logic just to be on the safe side?

>  	volatile uint64_t *src = ((volatile uint64_t *)wqe);
> 
>  	rte_cio_wmb();
> diff --git a/drivers/net/mlx5/mlx5_trigger.c
> b/drivers/net/mlx5/mlx5_trigger.c index 7c1e5594d6..b7fde35758 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
>  			goto error;
>  		}
>  	}
> -	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
> -	if (ret) {
> -		/* Adjust index for rollback. */
> -		i = priv->txqs_n - 1;
> -		goto error;
> -	}
>  	return 0;
>  error:
>  	ret = rte_errno; /* Save rte_errno before cleanup. */ diff --git
> a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index
> 4bd08cb035..5fb1761955 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>  		}
>  }
> 
> +/**
> + * Initialize Tx UAR registers for primary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) {
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(ppriv);
> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; #ifndef
> +RTE_ARCH_64
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	unsigned int lock_idx;
> +	/* Assign an UAR lock according to UAR page number */
> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> +		   MLX5_UAR_PAGE_NUM_MASK;
> +	txq->uar_lock = &priv->uar_lock[lock_idx]; #endif }
> 
>  /**
> - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
> - * Both primary and secondary process do mmap to make UAR address
> - * aligned.
> + * Remap UAR register of a Tx queue for secondary process.
>   *
> - * @param[in] dev
> + * Remapped address is stored at the table in the process private
> +structure of
> + * the device, indexed by queue index.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + * @param fd
> + *   Verbs file descriptor to map UAR pages.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) {
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	void *addr;
> +	uintptr_t uar_va;
> +	uintptr_t offset;
> +	const size_t page_size = sysconf(_SC_PAGESIZE);
> +
> +	assert(ppriv);
> +	/*
> +	 * As rdma-core, UARs are mapped in size of OS page
> +	 * size. Ref to libmlx5 function: mlx5_init_context()
> +	 */
> +	uar_va = (uintptr_t)txq_ctrl->bf_reg;
> +	offset = uar_va & (page_size - 1); /* Offset in page. */
> +	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
> +			txq_ctrl->uar_mmap_offset);
> +	if (addr == MAP_FAILED) {
> +		DRV_LOG(ERR,
> +			"port %u mmap failed for BF reg of txq %u",
> +			txq->port_id, txq->idx);
> +		rte_errno = ENXIO;
> +		return -rte_errno;
> +	}
> +	addr = RTE_PTR_ADD(addr, offset);
> +	ppriv->uar_table[txq->idx] = addr;
> +	return 0;
> +}
> +
> +/**
> + * Unmap UAR register of a Tx queue for secondary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) {
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl-
> >priv));
> +	const size_t page_size = sysconf(_SC_PAGESIZE);
> +	void *addr;
> +
> +	addr = ppriv->uar_table[txq_ctrl->txq.idx];
> +	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); }
> +
> +/**
> + * Initialize Tx UAR registers for secondary process.
> + *
> + * @param dev
>   *   Pointer to Ethernet device.
>   * @param fd
>   *   Verbs file descriptor to map UAR pages.
> @@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
> +mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	unsigned int i, j;
> -	uintptr_t pages[priv->txqs_n];
> -	unsigned int pages_n = 0;
> -	uintptr_t uar_va;
> -	uintptr_t off;
> -	void *addr;
> -	void *ret;
>  	struct mlx5_txq_data *txq;
>  	struct mlx5_txq_ctrl *txq_ctrl;
> -	int already_mapped;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> -#ifndef RTE_ARCH_64
> -	unsigned int lock_idx;
> -#endif
> +	unsigned int i;
> +	int ret;
> 
> -	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
> -	/*
> -	 * As rdma-core, UARs are mapped in size of OS page size.
> -	 * Use aligned address to avoid duplicate mmap.
> -	 * Ref to libmlx5 function: mlx5_init_context()
> -	 */
> +	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
>  	for (i = 0; i != priv->txqs_n; ++i) {
>  		if (!(*priv->txqs)[i])
>  			continue;
>  		txq = (*priv->txqs)[i];
>  		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
>  		assert(txq->idx == (uint16_t)i);
> -		/* UAR addr form verbs used to find dup and offset in page.
> */
> -		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
> -		off = uar_va & (page_size - 1); /* offset in page. */
> -		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page
> addr. */
> -		already_mapped = 0;
> -		for (j = 0; j != pages_n; ++j) {
> -			if (pages[j] == uar_va) {
> -				already_mapped = 1;
> -				break;
> -			}
> -		}
> -		/* new address in reserved UAR address space. */
> -		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
> -				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
> -		if (!already_mapped) {
> -			pages[pages_n++] = uar_va;
> -			/* fixed mmap to specified address in reserved
> -			 * address space.
> -			 */
> -			ret = mmap(addr, page_size,
> -				   PROT_WRITE, MAP_FIXED | MAP_SHARED,
> fd,
> -				   txq_ctrl->uar_mmap_offset);
> -			if (ret != addr) {
> -				/* fixed mmap have to return same address
> */
> -				DRV_LOG(ERR,
> -					"port %u call to mmap failed on UAR"
> -					" for txq %u",
> -					dev->data->port_id, txq->idx);
> -				rte_errno = ENXIO;
> -				return -rte_errno;
> -			}
> -		}
> -		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save
> once */
> -			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr,
> off);
> -		else
> -			assert(txq_ctrl->txq.bf_reg ==
> -			       RTE_PTR_ADD((void *)addr, off));
> -#ifndef RTE_ARCH_64
> -		/* Assign a UAR lock according to UAR page number */
> -		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> -			   MLX5_UAR_PAGE_NUM_MASK;
> -		txq->uar_lock = &priv->uar_lock[lock_idx];
> -#endif
> +		ret = txq_uar_init_secondary(txq_ctrl, fd);
> +		if (ret)
> +			goto error;
>  	}
>  	return 0;
> +error:
> +	/* Rollback. */
> +	do {
> +		if (!(*priv->txqs)[i])
> +			continue;
> +		txq = (*priv->txqs)[i];
> +		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
> +		txq_uar_uninit_secondary(txq_ctrl);
> +	} while (i--);
> +	return -rte_errno;
>  }
> 
>  /**
> @@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_data->wqes = qp.sq.buf;
>  	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
>  	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
> -	txq_ctrl->bf_reg_orig = qp.bf.reg;
>  	txq_data->cq_db = cq_info.dbrec;
>  	txq_data->cqes =
>  		(volatile struct mlx5_cqe (*)[])
> @@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_ibv->qp = tmpl.qp;
>  	txq_ibv->cq = tmpl.cq;
>  	rte_atomic32_inc(&txq_ibv->refcnt);
> +	txq_ctrl->bf_reg = qp.bf.reg;
> +	txq_uar_init(txq_ctrl);
>  	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
>  		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
>  		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx", @@ -
> 778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
> uint16_t desc,
>  	tmpl->priv = priv;
>  	tmpl->socket = socket;
>  	tmpl->txq.elts_n = log2above(desc);
> +	tmpl->txq.port_id = dev->data->port_id;
>  	tmpl->txq.idx = idx;
>  	txq_set_params(tmpl);
>  	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", @@ -
> 836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
> {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_txq_ctrl *txq;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> 
>  	if (!(*priv->txqs)[idx])
>  		return 0;
>  	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
>  	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
>  		txq->ibv = NULL;
> -	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
> page_size),
> -	       page_size);
>  	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
>  		txq_free_elts(txq);
>  		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-08  5:48     ` Shahaf Shuler
@ 2019-04-08  5:48       ` Shahaf Shuler
  2019-04-09 19:36       ` Yongseok Koh
  1 sibling, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-08  5:48 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Hi Koh,

See small comments below. Same for mlx4 patch.


Friday, April 5, 2019 4:34 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
> 
> UAR (User Access Region) register does not need to be remapped for
> primary process but it should be remapped only for secondary process. UAR
> register table is in the process private structure in rte_eth_devices[],
> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
> 
> The actual UAR table follows the data structure and the table is used for both
> Tx and Rx.
> 
> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
> is defined to get a register for the txq. Processes access its own private data
> to acquire the register from the UAR table.
> 
> For Rx, the doorbell in UAR is required in arming CQ event. However, it is a
> known issue that the register isn't remapped for secondary process.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
>  drivers/net/mlx5/mlx5.h         |  15 ++-
>  drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
>  drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
>  drivers/net/mlx5/mlx5_trigger.c |   6 --
>  drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++-------------
> -

[...]

>  /**
> @@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	}
>  	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +		struct mlx5_proc_priv *ppriv;
> +		size_t ppriv_size;
> +
>  		eth_dev = rte_eth_dev_attach_secondary(name);
>  		if (eth_dev == NULL) {
>  			DRV_LOG(ERR, "can not attach rte ethdev");
>  			rte_errno = ENOMEM;
>  			return NULL;
>  		}
> +		priv = eth_dev->data->dev_private;
> +		/*
> +		 * UAR register table follows the process private structure.
> +		 * BlueFlame registers for Tx queues come first and registers
> +		 * for Rx queues follows.
> +		 */
> +		ppriv_size = sizeof(struct mlx5_proc_priv) +
> +			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);

Why you add also the rxqs_n? why not only the txqs? 

> +		ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
> +					  RTE_CACHE_LINE_SIZE,
> +					  dpdk_dev->numa_node);
> +		if (!ppriv) {
> +			rte_errno = ENOMEM;
> +			return NULL;
> +		}
> +		ppriv->uar_table_sz = ppriv_size;
> +		eth_dev->process_private = ppriv;
>  		eth_dev->device = dpdk_dev;
>  		eth_dev->dev_ops = &mlx5_dev_sec_ops;
>  		/* Receive command fd from primary process */ @@ -1195,7
> +1043,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		if (err < 0)
>  			return NULL;
>  		/* Remap UAR for Tx queues. */
> -		err = mlx5_tx_uar_remap(eth_dev, err);
> +		err = mlx5_tx_uar_init_secondary(eth_dev, err);
>  		if (err)
>  			return NULL;
>  		/*
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 699c8fcf6d..1ac4ad71b1 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -97,8 +97,6 @@ struct mlx5_shared_data {
>  	/* Global spinlock for primary and secondary processes. */
>  	int init_done; /* Whether primary has done initialization. */
>  	unsigned int secondary_cnt; /* Number of secondary processes
> init'd. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
>  	struct mlx5_dev_list mem_event_cb_list;
>  	rte_rwlock_t mem_event_rwlock;
>  };
> @@ -106,8 +104,6 @@ struct mlx5_shared_data {
>  /* Per-process data structure, not visible to other processes. */  struct
> mlx5_local_data {
>  	int init_done; /* Whether a secondary has done initialization. */
> -	void *uar_base;
> -	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
>  };
> 
>  extern struct mlx5_shared_data *mlx5_shared_data; @@ -282,6 +278,17
> @@ struct mlx5_ibv_shared {
>  	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
> };
> 
> +/* Per-process private structure. */
> +struct mlx5_proc_priv {
> +	size_t uar_table_sz;
> +	/* Size of UAR register table. */
> +	void *uar_table[];
> +	/* Table of UAR registers for each process. */ };
> +
> +#define MLX5_PROC_PRIV(port_id) \
> +	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
> +
>  struct mlx5_priv {
>  	LIST_ENTRY(mlx5_priv) mem_event_cb;
>  	/**< Called by memory event callback. */ diff --git
> a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index
> 9ae9dddd3c..42297f11c9 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -382,6 +382,8 @@ int
>  mlx5_dev_configure(struct rte_eth_dev *dev)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> +	struct mlx5_proc_priv *ppriv;
> +	size_t ppriv_size;
>  	unsigned int rxqs_n = dev->data->nb_rx_queues;
>  	unsigned int txqs_n = dev->data->nb_tx_queues;
>  	unsigned int i;
> @@ -450,6 +452,21 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
>  		if (++j == rxqs_n)
>  			j = 0;
>  	}
> +	/*
> +	 * UAR register table follows the process private structure. BlueFlame
> +	 * registers for Tx queues come first and registers for Rx queues
> +	 * follows.
> +	 */
> +	ppriv_size = sizeof(struct mlx5_proc_priv) +
> +		     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);

Ditto. 

> +	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
> +				  RTE_CACHE_LINE_SIZE, dev->device-
> >numa_node);
> +	if (!ppriv) {
> +		rte_errno = ENOMEM;
> +		return -rte_errno;
> +	}
> +	ppriv->uar_table_sz = ppriv_size;
> +	dev->process_private = ppriv;
>  	return 0;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 7b58063ceb..5d49892429 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -201,8 +201,8 @@ struct mlx5_txq_data {
>  	volatile void *wqes; /* Work queue (use volatile to write into). */
>  	volatile uint32_t *qp_db; /* Work queue doorbell. */
>  	volatile uint32_t *cq_db; /* Completion queue doorbell. */
> -	volatile void *bf_reg; /* Blueflame register remapped. */
>  	struct rte_mbuf *(*elts)[]; /* TX elements. */
> +	uint16_t port_id; /* Port ID of device. */
>  	uint16_t idx; /* Queue index. */
>  	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
> RTE_ARCH_64 @@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
>  	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
> process. */
> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
> +	void *bf_reg; /* BlueFlame register from Verbs. */

I guess you keep this one in order to get the VA offset for the secondary mapping, right? Because otherwise we can take the bf_reg from the UAR table on the process private.

If so, better to rename it to uar_page_offset (or other name you like) in order to avoid fields duplication. 

>  };
> 
> +#define MLX5_TX_BFREG(txq) \
> +		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
> +
>  /* mlx5_rxq.c */
> 
>  extern uint8_t rss_hash_default_key[];
> @@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
>  			unsigned int socket, const struct rte_eth_txconf
> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
> +int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
>  struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
> -704,7 +707,7 @@ static __rte_always_inline void
> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
> mlx5_wqe *wqe,
>  		       int cond)
>  {
> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
> +	uint64_t *dst = MLX5_TX_BFREG(txq);

I guess no perf penalty due to this change right?
Would you consider to prefetch the addr before the db logic just to be on the safe side?

>  	volatile uint64_t *src = ((volatile uint64_t *)wqe);
> 
>  	rte_cio_wmb();
> diff --git a/drivers/net/mlx5/mlx5_trigger.c
> b/drivers/net/mlx5/mlx5_trigger.c index 7c1e5594d6..b7fde35758 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
>  			goto error;
>  		}
>  	}
> -	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
> -	if (ret) {
> -		/* Adjust index for rollback. */
> -		i = priv->txqs_n - 1;
> -		goto error;
> -	}
>  	return 0;
>  error:
>  	ret = rte_errno; /* Save rte_errno before cleanup. */ diff --git
> a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index
> 4bd08cb035..5fb1761955 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>  		}
>  }
> 
> +/**
> + * Initialize Tx UAR registers for primary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) {
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(ppriv);
> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; #ifndef
> +RTE_ARCH_64
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	unsigned int lock_idx;
> +	/* Assign an UAR lock according to UAR page number */
> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> +		   MLX5_UAR_PAGE_NUM_MASK;
> +	txq->uar_lock = &priv->uar_lock[lock_idx]; #endif }
> 
>  /**
> - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
> - * Both primary and secondary process do mmap to make UAR address
> - * aligned.
> + * Remap UAR register of a Tx queue for secondary process.
>   *
> - * @param[in] dev
> + * Remapped address is stored at the table in the process private
> +structure of
> + * the device, indexed by queue index.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + * @param fd
> + *   Verbs file descriptor to map UAR pages.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) {
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	void *addr;
> +	uintptr_t uar_va;
> +	uintptr_t offset;
> +	const size_t page_size = sysconf(_SC_PAGESIZE);
> +
> +	assert(ppriv);
> +	/*
> +	 * As rdma-core, UARs are mapped in size of OS page
> +	 * size. Ref to libmlx5 function: mlx5_init_context()
> +	 */
> +	uar_va = (uintptr_t)txq_ctrl->bf_reg;
> +	offset = uar_va & (page_size - 1); /* Offset in page. */
> +	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
> +			txq_ctrl->uar_mmap_offset);
> +	if (addr == MAP_FAILED) {
> +		DRV_LOG(ERR,
> +			"port %u mmap failed for BF reg of txq %u",
> +			txq->port_id, txq->idx);
> +		rte_errno = ENXIO;
> +		return -rte_errno;
> +	}
> +	addr = RTE_PTR_ADD(addr, offset);
> +	ppriv->uar_table[txq->idx] = addr;
> +	return 0;
> +}
> +
> +/**
> + * Unmap UAR register of a Tx queue for secondary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) {
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl-
> >priv));
> +	const size_t page_size = sysconf(_SC_PAGESIZE);
> +	void *addr;
> +
> +	addr = ppriv->uar_table[txq_ctrl->txq.idx];
> +	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); }
> +
> +/**
> + * Initialize Tx UAR registers for secondary process.
> + *
> + * @param dev
>   *   Pointer to Ethernet device.
>   * @param fd
>   *   Verbs file descriptor to map UAR pages.
> @@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
> +mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	unsigned int i, j;
> -	uintptr_t pages[priv->txqs_n];
> -	unsigned int pages_n = 0;
> -	uintptr_t uar_va;
> -	uintptr_t off;
> -	void *addr;
> -	void *ret;
>  	struct mlx5_txq_data *txq;
>  	struct mlx5_txq_ctrl *txq_ctrl;
> -	int already_mapped;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> -#ifndef RTE_ARCH_64
> -	unsigned int lock_idx;
> -#endif
> +	unsigned int i;
> +	int ret;
> 
> -	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
> -	/*
> -	 * As rdma-core, UARs are mapped in size of OS page size.
> -	 * Use aligned address to avoid duplicate mmap.
> -	 * Ref to libmlx5 function: mlx5_init_context()
> -	 */
> +	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
>  	for (i = 0; i != priv->txqs_n; ++i) {
>  		if (!(*priv->txqs)[i])
>  			continue;
>  		txq = (*priv->txqs)[i];
>  		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
>  		assert(txq->idx == (uint16_t)i);
> -		/* UAR addr form verbs used to find dup and offset in page.
> */
> -		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
> -		off = uar_va & (page_size - 1); /* offset in page. */
> -		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page
> addr. */
> -		already_mapped = 0;
> -		for (j = 0; j != pages_n; ++j) {
> -			if (pages[j] == uar_va) {
> -				already_mapped = 1;
> -				break;
> -			}
> -		}
> -		/* new address in reserved UAR address space. */
> -		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
> -				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
> -		if (!already_mapped) {
> -			pages[pages_n++] = uar_va;
> -			/* fixed mmap to specified address in reserved
> -			 * address space.
> -			 */
> -			ret = mmap(addr, page_size,
> -				   PROT_WRITE, MAP_FIXED | MAP_SHARED,
> fd,
> -				   txq_ctrl->uar_mmap_offset);
> -			if (ret != addr) {
> -				/* fixed mmap have to return same address
> */
> -				DRV_LOG(ERR,
> -					"port %u call to mmap failed on UAR"
> -					" for txq %u",
> -					dev->data->port_id, txq->idx);
> -				rte_errno = ENXIO;
> -				return -rte_errno;
> -			}
> -		}
> -		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save
> once */
> -			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr,
> off);
> -		else
> -			assert(txq_ctrl->txq.bf_reg ==
> -			       RTE_PTR_ADD((void *)addr, off));
> -#ifndef RTE_ARCH_64
> -		/* Assign a UAR lock according to UAR page number */
> -		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> -			   MLX5_UAR_PAGE_NUM_MASK;
> -		txq->uar_lock = &priv->uar_lock[lock_idx];
> -#endif
> +		ret = txq_uar_init_secondary(txq_ctrl, fd);
> +		if (ret)
> +			goto error;
>  	}
>  	return 0;
> +error:
> +	/* Rollback. */
> +	do {
> +		if (!(*priv->txqs)[i])
> +			continue;
> +		txq = (*priv->txqs)[i];
> +		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
> +		txq_uar_uninit_secondary(txq_ctrl);
> +	} while (i--);
> +	return -rte_errno;
>  }
> 
>  /**
> @@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_data->wqes = qp.sq.buf;
>  	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
>  	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
> -	txq_ctrl->bf_reg_orig = qp.bf.reg;
>  	txq_data->cq_db = cq_info.dbrec;
>  	txq_data->cqes =
>  		(volatile struct mlx5_cqe (*)[])
> @@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>  	txq_ibv->qp = tmpl.qp;
>  	txq_ibv->cq = tmpl.cq;
>  	rte_atomic32_inc(&txq_ibv->refcnt);
> +	txq_ctrl->bf_reg = qp.bf.reg;
> +	txq_uar_init(txq_ctrl);
>  	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
>  		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
>  		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx", @@ -
> 778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
> uint16_t desc,
>  	tmpl->priv = priv;
>  	tmpl->socket = socket;
>  	tmpl->txq.elts_n = log2above(desc);
> +	tmpl->txq.port_id = dev->data->port_id;
>  	tmpl->txq.idx = idx;
>  	txq_set_params(tmpl);
>  	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", @@ -
> 836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
> {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_txq_ctrl *txq;
> -	size_t page_size = sysconf(_SC_PAGESIZE);
> 
>  	if (!(*priv->txqs)[idx])
>  		return 0;
>  	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
>  	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
>  		txq->ibv = NULL;
> -	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
> page_size),
> -	       page_size);
>  	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
>  		txq_free_elts(txq);
>  		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-08  5:48     ` Shahaf Shuler
  2019-04-08  5:48       ` Shahaf Shuler
@ 2019-04-09 19:36       ` Yongseok Koh
  2019-04-09 19:36         ` Yongseok Koh
  1 sibling, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 19:36 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev


> On Apr 7, 2019, at 10:48 PM, Shahaf Shuler <shahafs@mellanox.com> wrote:
> 
> Hi Koh,
> 
> See small comments below. Same for mlx4 patch.
> 
> 
> Friday, April 5, 2019 4:34 AM, Yongseok Koh:
>> Subject: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
>> 
>> UAR (User Access Region) register does not need to be remapped for
>> primary process but it should be remapped only for secondary process. UAR
>> register table is in the process private structure in rte_eth_devices[],
>> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
>> 
>> The actual UAR table follows the data structure and the table is used for both
>> Tx and Rx.
>> 
>> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
>> is defined to get a register for the txq. Processes access its own private data
>> to acquire the register from the UAR table.
>> 
>> For Rx, the doorbell in UAR is required in arming CQ event. However, it is a
>> known issue that the register isn't remapped for secondary process.
>> 
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>> ---
>> drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
>> drivers/net/mlx5/mlx5.h         |  15 ++-
>> drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
>> drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
>> drivers/net/mlx5/mlx5_trigger.c |   6 --
>> drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++-------------
>> -
> 
> [...]
> 
>> /**
>> @@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>> 	}
>> 	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
>> 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>> +		struct mlx5_proc_priv *ppriv;
>> +		size_t ppriv_size;
>> +
>> 		eth_dev = rte_eth_dev_attach_secondary(name);
>> 		if (eth_dev == NULL) {
>> 			DRV_LOG(ERR, "can not attach rte ethdev");
>> 			rte_errno = ENOMEM;
>> 			return NULL;
>> 		}
>> +		priv = eth_dev->data->dev_private;
>> +		/*
>> +		 * UAR register table follows the process private structure.
>> +		 * BlueFlame registers for Tx queues come first and registers
>> +		 * for Rx queues follows.
>> +		 */
>> +		ppriv_size = sizeof(struct mlx5_proc_priv) +
>> +			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
> 
> Why you add also the rxqs_n? why not only the txqs?

I wanted to make a room for the registers for arming Rx CQ, which will be fixed
soon. But, I think it would be better to avoid confusion in this patch. Will remove.

[...]
>> +	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
>> +				  RTE_CACHE_LINE_SIZE, dev->device-
>>> numa_node);
>> +	if (!ppriv) {
>> +		rte_errno = ENOMEM;
>> +		return -rte_errno;
>> +	}
>> +	ppriv->uar_table_sz = ppriv_size;
>> +	dev->process_private = ppriv;
>> 	return 0;
>> }
>> 
>> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
>> index 7b58063ceb..5d49892429 100644
>> --- a/drivers/net/mlx5/mlx5_rxtx.h
>> +++ b/drivers/net/mlx5/mlx5_rxtx.h
>> @@ -201,8 +201,8 @@ struct mlx5_txq_data {
>> 	volatile void *wqes; /* Work queue (use volatile to write into). */
>> 	volatile uint32_t *qp_db; /* Work queue doorbell. */
>> 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
>> -	volatile void *bf_reg; /* Blueflame register remapped. */
>> 	struct rte_mbuf *(*elts)[]; /* TX elements. */
>> +	uint16_t port_id; /* Port ID of device. */
>> 	uint16_t idx; /* Queue index. */
>> 	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
>> RTE_ARCH_64 @@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
>> 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
>> 	struct mlx5_priv *priv; /* Back pointer to private data. */
>> 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
>> process. */
>> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
>> +	void *bf_reg; /* BlueFlame register from Verbs. */
> 
> I guess you keep this one in order to get the VA offset for the secondary mapping, right? Because otherwise we can take the bf_reg from the UAR table on the process private.
> 
> If so, better to rename it to uar_page_offset (or other name you like) in order to avoid fields duplication. 

It doesn't have the offset (offset can be calculated when remapping).
It is the original BF register address gotten from QP creation.

>> };
>> 
>> +#define MLX5_TX_BFREG(txq) \
>> +		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
>> +
>> /* mlx5_rxq.c */
>> 
>> extern uint8_t rss_hash_default_key[];
>> @@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
>> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
>> uint16_t idx, uint16_t desc,
>> 			unsigned int socket, const struct rte_eth_txconf
>> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
>> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
>> +int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
>> struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
>> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
>> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
>> -704,7 +707,7 @@ static __rte_always_inline void
>> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
>> mlx5_wqe *wqe,
>> 		       int cond)
>> {
>> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
>> +	uint64_t *dst = MLX5_TX_BFREG(txq);
> 
> I guess no perf penalty due to this change right?

I've confirmed no perf drop on x86 and ARM (BlueField).
It might have been smaller than the jitter even if any.

> Would you consider to prefetch the addr before the db logic just to be on the safe side?

As it is not a random/sequential access but accessing the same cacheline
repeatedly, I wouldn't have a prefetch here. Sometimes, prefetch have a little cost.

Will send out a new version just in case you agree and want to merge it.
If you still want to change something, please comment on the new one.


Thanks,
Yongseok

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
  2019-04-09 19:36       ` Yongseok Koh
@ 2019-04-09 19:36         ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 19:36 UTC (permalink / raw)
  To: Shahaf Shuler; +Cc: dev


> On Apr 7, 2019, at 10:48 PM, Shahaf Shuler <shahafs@mellanox.com> wrote:
> 
> Hi Koh,
> 
> See small comments below. Same for mlx4 patch.
> 
> 
> Friday, April 5, 2019 4:34 AM, Yongseok Koh:
>> Subject: [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap
>> 
>> UAR (User Access Region) register does not need to be remapped for
>> primary process but it should be remapped only for secondary process. UAR
>> register table is in the process private structure in rte_eth_devices[],
>> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
>> 
>> The actual UAR table follows the data structure and the table is used for both
>> Tx and Rx.
>> 
>> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
>> is defined to get a register for the txq. Processes access its own private data
>> to acquire the register from the UAR table.
>> 
>> For Rx, the doorbell in UAR is required in arming CQ event. However, it is a
>> known issue that the register isn't remapped for secondary process.
>> 
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>> ---
>> drivers/net/mlx5/mlx5.c         | 198 +++++-----------------------------------
>> drivers/net/mlx5/mlx5.h         |  15 ++-
>> drivers/net/mlx5/mlx5_ethdev.c  |  17 ++++
>> drivers/net/mlx5/mlx5_rxtx.h    |  11 ++-
>> drivers/net/mlx5/mlx5_trigger.c |   6 --
>> drivers/net/mlx5/mlx5_txq.c     | 180 ++++++++++++++++++++++-------------
>> -
> 
> [...]
> 
>> /**
>> @@ -1182,12 +1010,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>> 	}
>> 	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
>> 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>> +		struct mlx5_proc_priv *ppriv;
>> +		size_t ppriv_size;
>> +
>> 		eth_dev = rte_eth_dev_attach_secondary(name);
>> 		if (eth_dev == NULL) {
>> 			DRV_LOG(ERR, "can not attach rte ethdev");
>> 			rte_errno = ENOMEM;
>> 			return NULL;
>> 		}
>> +		priv = eth_dev->data->dev_private;
>> +		/*
>> +		 * UAR register table follows the process private structure.
>> +		 * BlueFlame registers for Tx queues come first and registers
>> +		 * for Rx queues follows.
>> +		 */
>> +		ppriv_size = sizeof(struct mlx5_proc_priv) +
>> +			     (priv->rxqs_n + priv->txqs_n) * sizeof(void *);
> 
> Why you add also the rxqs_n? why not only the txqs?

I wanted to make a room for the registers for arming Rx CQ, which will be fixed
soon. But, I think it would be better to avoid confusion in this patch. Will remove.

[...]
>> +	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
>> +				  RTE_CACHE_LINE_SIZE, dev->device-
>>> numa_node);
>> +	if (!ppriv) {
>> +		rte_errno = ENOMEM;
>> +		return -rte_errno;
>> +	}
>> +	ppriv->uar_table_sz = ppriv_size;
>> +	dev->process_private = ppriv;
>> 	return 0;
>> }
>> 
>> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
>> index 7b58063ceb..5d49892429 100644
>> --- a/drivers/net/mlx5/mlx5_rxtx.h
>> +++ b/drivers/net/mlx5/mlx5_rxtx.h
>> @@ -201,8 +201,8 @@ struct mlx5_txq_data {
>> 	volatile void *wqes; /* Work queue (use volatile to write into). */
>> 	volatile uint32_t *qp_db; /* Work queue doorbell. */
>> 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
>> -	volatile void *bf_reg; /* Blueflame register remapped. */
>> 	struct rte_mbuf *(*elts)[]; /* TX elements. */
>> +	uint16_t port_id; /* Port ID of device. */
>> 	uint16_t idx; /* Queue index. */
>> 	struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
>> RTE_ARCH_64 @@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
>> 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
>> 	struct mlx5_priv *priv; /* Back pointer to private data. */
>> 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary
>> process. */
>> -	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
>> +	void *bf_reg; /* BlueFlame register from Verbs. */
> 
> I guess you keep this one in order to get the VA offset for the secondary mapping, right? Because otherwise we can take the bf_reg from the UAR table on the process private.
> 
> If so, better to rename it to uar_page_offset (or other name you like) in order to avoid fields duplication. 

It doesn't have the offset (offset can be calculated when remapping).
It is the original BF register address gotten from QP creation.

>> };
>> 
>> +#define MLX5_TX_BFREG(txq) \
>> +		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
>> +
>> /* mlx5_rxq.c */
>> 
>> extern uint8_t rss_hash_default_key[];
>> @@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
>> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
>> uint16_t idx, uint16_t desc,
>> 			unsigned int socket, const struct rte_eth_txconf
>> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
>> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
>> +int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
>> struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
>> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
>> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
>> -704,7 +707,7 @@ static __rte_always_inline void
>> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
>> mlx5_wqe *wqe,
>> 		       int cond)
>> {
>> -	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
>> +	uint64_t *dst = MLX5_TX_BFREG(txq);
> 
> I guess no perf penalty due to this change right?

I've confirmed no perf drop on x86 and ARM (BlueField).
It might have been smaller than the jitter even if any.

> Would you consider to prefetch the addr before the db logic just to be on the safe side?

As it is not a random/sequential access but accessing the same cacheline
repeatedly, I wouldn't have a prefetch here. Sometimes, prefetch have a little cost.

Will send out a new version just in case you agree and want to merge it.
If you still want to change something, please comment on the new one.


Thanks,
Yongseok

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
                   ` (5 preceding siblings ...)
  2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
@ 2019-04-09 23:13 ` Yongseok Koh
  2019-04-09 23:13   ` Yongseok Koh
                     ` (5 more replies)
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
  7 siblings, 6 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v4:
* add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
* remove the number of Rx queues from the ppriv size calculation
* move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
* rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

[1] http://patches.dpdk.org/patch/52435/

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
 drivers/net/mlx4/mlx4.h            |  15 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
 drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
 drivers/net/mlx5/mlx5.h            |  17 ++-
 drivers/net/mlx5/mlx5_ethdev.c     |   3 +
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
 drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 199 +++++++++++++++++------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 18 files changed, 413 insertions(+), 575 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
@ 2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v4:
* add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
* remove the number of Rx queues from the ppriv size calculation
* move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
* rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

[1] http://patches.dpdk.org/patch/52435/

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
 drivers/net/mlx4/mlx4.h            |  15 ++-
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
 drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
 drivers/net/mlx5/mlx5.h            |  17 ++-
 drivers/net/mlx5/mlx5_ethdev.c     |   3 +
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
 drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 199 +++++++++++++++++------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 18 files changed, 413 insertions(+), 575 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
  2019-04-09 23:13   ` Yongseok Koh
@ 2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13     ` Yongseok Koh
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index Yongseok Koh
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9b2251bf2..960a2f8191 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-09 23:13     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9b2251bf2..960a2f8191 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
  2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13     ` Yongseok Koh
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap Yongseok Koh
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-09 23:13     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
                     ` (2 preceding siblings ...)
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13     ` Yongseok Koh
  2019-04-10 17:50     ` Ferruh Yigit
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 4/4] net/mlx4: " Yongseok Koh
  2019-04-10  6:58   ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Shahaf Shuler
  5 siblings, 2 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 228 ++++++++++------------------------------
 drivers/net/mlx5/mlx5.h         |  16 ++-
 drivers/net/mlx5/mlx5_ethdev.c  |   3 +
 drivers/net/mlx5/mlx5_rxtx.h    |  11 +-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 180 +++++++++++++++++++------------
 6 files changed, 185 insertions(+), 259 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 475c93ddf9..9ff50dfbe4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -546,6 +522,54 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 }
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size =
+		sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback to close the device.
  *
  * Destroy all queues and objects, free memory.
@@ -589,6 +613,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_proc_priv_uninit(dev);
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -913,132 +938,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1054,7 +953,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1070,18 +968,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1090,23 +982,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1197,12 +1072,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		}
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
+		err = mlx5_proc_priv_init(eth_dev);
+		if (err)
+			return NULL;
 		/* Receive command fd from primary process */
 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 960a2f8191..14c7f3c6fb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
@@ -359,6 +366,7 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+int mlx5_proc_priv_init(struct rte_eth_dev *dev);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1e6fe192a6..3992918c57 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -450,6 +450,9 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	ret = mlx5_proc_priv_init(dev);
+	if (ret)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..5fb1761955 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	unsigned int lock_idx;
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Remap UAR register of a Tx queue for secondary process.
  *
- * @param[in] dev
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-09 23:13     ` Yongseok Koh
  2019-04-10 17:50     ` Ferruh Yigit
  1 sibling, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 228 ++++++++++------------------------------
 drivers/net/mlx5/mlx5.h         |  16 ++-
 drivers/net/mlx5/mlx5_ethdev.c  |   3 +
 drivers/net/mlx5/mlx5_rxtx.h    |  11 +-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 180 +++++++++++++++++++------------
 6 files changed, 185 insertions(+), 259 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 475c93ddf9..9ff50dfbe4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -546,6 +522,54 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 }
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size =
+		sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback to close the device.
  *
  * Destroy all queues and objects, free memory.
@@ -589,6 +613,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_proc_priv_uninit(dev);
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -913,132 +938,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1054,7 +953,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1070,18 +968,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1090,23 +982,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1197,12 +1072,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		}
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
+		err = mlx5_proc_priv_init(eth_dev);
+		if (err)
+			return NULL;
 		/* Receive command fd from primary process */
 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 960a2f8191..14c7f3c6fb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
@@ -359,6 +366,7 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+int mlx5_proc_priv_init(struct rte_eth_dev *dev);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1e6fe192a6..3992918c57 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -450,6 +450,9 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	ret = mlx5_proc_priv_init(dev);
+	if (ret)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..5fb1761955 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	unsigned int lock_idx;
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Remap UAR register of a Tx queue for secondary process.
  *
- * @param[in] dev
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +330,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +548,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +561,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +820,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +879,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 4/4] net/mlx4: remove device register remap
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
                     ` (3 preceding siblings ...)
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-09 23:13   ` Yongseok Koh
  2019-04-09 23:13     ` Yongseok Koh
  2019-04-10  6:58   ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Shahaf Shuler
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 255 +++++++++++--------------------------------
 drivers/net/mlx4/mlx4.h      |  15 ++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 ++++++++++++++++++-----------
 6 files changed, 183 insertions(+), 268 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9bca0ce9cd..17dfcd5a3b 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -207,6 +183,53 @@ mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
 #endif
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     dev->data->nb_tx_queues * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback for Ethernet device configuration.
  *
  * @param dev
@@ -232,9 +255,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 		goto exit;
 	}
 	ret = mlx4_intr_install(priv);
-	if (ret)
+	if (ret) {
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+		goto exit;
+	}
+	ret = mlx4_proc_priv_init(dev);
+	if (ret) {
+		ERROR("%p: process private data allocation failed",
+		      (void *)dev);
+		goto exit;
+	}
 exit:
 	return ret;
 }
@@ -262,11 +293,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,10 +340,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
-#endif
 
 	if (!priv->started)
 		return;
@@ -331,17 +353,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
-#endif
 }
 
 /**
@@ -372,6 +383,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
 		mlx4_rx_queue_release(dev->data->rx_queues[i]);
 	for (i = 0; i != dev->data->nb_tx_queues; ++i)
 		mlx4_tx_queue_release(dev->data->tx_queues[i]);
+	mlx4_proc_priv_uninit(dev);
 	mlx4_mr_release(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
@@ -666,130 +678,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -805,7 +693,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -821,18 +708,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -841,23 +722,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1009,6 +873,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
+			err = mlx4_proc_priv_init(eth_dev);
+			if (err)
+				goto error;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
@@ -1016,7 +883,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..904c4f5c03 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +145,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v4 4/4] net/mlx4: remove device register remap
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 4/4] net/mlx4: " Yongseok Koh
@ 2019-04-09 23:13     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-09 23:13 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 255 +++++++++++--------------------------------
 drivers/net/mlx4/mlx4.h      |  15 ++-
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 ++++++++++++++++++-----------
 6 files changed, 183 insertions(+), 268 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9bca0ce9cd..17dfcd5a3b 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -207,6 +183,53 @@ mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
 #endif
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     dev->data->nb_tx_queues * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback for Ethernet device configuration.
  *
  * @param dev
@@ -232,9 +255,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 		goto exit;
 	}
 	ret = mlx4_intr_install(priv);
-	if (ret)
+	if (ret) {
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+		goto exit;
+	}
+	ret = mlx4_proc_priv_init(dev);
+	if (ret) {
+		ERROR("%p: process private data allocation failed",
+		      (void *)dev);
+		goto exit;
+	}
 exit:
 	return ret;
 }
@@ -262,11 +293,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,10 +340,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
-#endif
 
 	if (!priv->started)
 		return;
@@ -331,17 +353,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
-#endif
 }
 
 /**
@@ -372,6 +383,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
 		mlx4_rx_queue_release(dev->data->rx_queues[i]);
 	for (i = 0; i != dev->data->nb_tx_queues; ++i)
 		mlx4_tx_queue_release(dev->data->tx_queues[i]);
+	mlx4_proc_priv_uninit(dev);
 	mlx4_mr_release(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
@@ -666,130 +678,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -805,7 +693,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -821,18 +708,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -841,23 +722,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1009,6 +873,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
+			err = mlx4_proc_priv_init(eth_dev);
+			if (err)
+				goto error;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
@@ -1016,7 +883,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..904c4f5c03 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -138,8 +138,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +145,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
                     ` (4 preceding siblings ...)
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 4/4] net/mlx4: " Yongseok Koh
@ 2019-04-10  6:58   ` Shahaf Shuler
  2019-04-10  6:58     ` Shahaf Shuler
  2019-04-10 17:50     ` Ferruh Yigit
  5 siblings, 2 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-10  6:58 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Wednesday, April 10, 2019 2:13 AM, Yongseok Koh:
> Subject: [PATCH v4 0/4] net/mlx: remove device register remap
> 
> This patchset lifts the requirement of reserving huge virtual address space
> and remapping device UAR register on to it in order to use the same address
> between primary and secondary process.
> 

Series applied to next-net-mlx, thanks. 

> v4:
> * add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
> * remove the number of Rx queues from the ppriv size calculation
> * move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
> * rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]
> 
> v3:
> * move UAR table to per-process storage
> 
> v2:
> * rebase on the latest branch tip
> * fix a bug
> 
> [1] http://patches.dpdk.org/patch/52435/
> 
> Yongseok Koh (4):
>   net/mlx5: fix recursive inclusion of header file
>   net/mlx5: remove redundant queue index
>   net/mlx5: remove device register remap
>   net/mlx4: remove device register remap
> 
>  drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
>  drivers/net/mlx4/mlx4.h            |  15 ++-
>  drivers/net/mlx4/mlx4_prm.h        |   3 +-
>  drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
>  drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
>  drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
>  drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
>  drivers/net/mlx5/mlx5.h            |  17 ++-
>  drivers/net/mlx5/mlx5_ethdev.c     |   3 +
>  drivers/net/mlx5/mlx5_flow.c       |   5 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
>  drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
>  drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
>  drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
>  drivers/net/mlx5/mlx5_stats.c      |  15 +--
>  drivers/net/mlx5/mlx5_trigger.c    |   8 +-
>  drivers/net/mlx5/mlx5_txq.c        | 199 +++++++++++++++++------------
>  drivers/net/mlx5/mlx5_vlan.c       |   3 +-
>  18 files changed, 413 insertions(+), 575 deletions(-)
> 
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-04-10  6:58   ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Shahaf Shuler
@ 2019-04-10  6:58     ` Shahaf Shuler
  2019-04-10 17:50     ` Ferruh Yigit
  1 sibling, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-10  6:58 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Wednesday, April 10, 2019 2:13 AM, Yongseok Koh:
> Subject: [PATCH v4 0/4] net/mlx: remove device register remap
> 
> This patchset lifts the requirement of reserving huge virtual address space
> and remapping device UAR register on to it in order to use the same address
> between primary and secondary process.
> 

Series applied to next-net-mlx, thanks. 

> v4:
> * add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
> * remove the number of Rx queues from the ppriv size calculation
> * move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
> * rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]
> 
> v3:
> * move UAR table to per-process storage
> 
> v2:
> * rebase on the latest branch tip
> * fix a bug
> 
> [1] http://patches.dpdk.org/patch/52435/
> 
> Yongseok Koh (4):
>   net/mlx5: fix recursive inclusion of header file
>   net/mlx5: remove redundant queue index
>   net/mlx5: remove device register remap
>   net/mlx4: remove device register remap
> 
>  drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
>  drivers/net/mlx4/mlx4.h            |  15 ++-
>  drivers/net/mlx4/mlx4_prm.h        |   3 +-
>  drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
>  drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
>  drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
>  drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
>  drivers/net/mlx5/mlx5.h            |  17 ++-
>  drivers/net/mlx5/mlx5_ethdev.c     |   3 +
>  drivers/net/mlx5/mlx5_flow.c       |   5 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
>  drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
>  drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
>  drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
>  drivers/net/mlx5/mlx5_stats.c      |  15 +--
>  drivers/net/mlx5/mlx5_trigger.c    |   8 +-
>  drivers/net/mlx5/mlx5_txq.c        | 199 +++++++++++++++++------------
>  drivers/net/mlx5/mlx5_vlan.c       |   3 +-
>  18 files changed, 413 insertions(+), 575 deletions(-)
> 
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap Yongseok Koh
  2019-04-09 23:13     ` Yongseok Koh
@ 2019-04-10 17:50     ` Ferruh Yigit
  2019-04-10 17:50       ` Ferruh Yigit
  2019-04-10 19:12       ` Yongseok Koh
  1 sibling, 2 replies; 66+ messages in thread
From: Ferruh Yigit @ 2019-04-10 17:50 UTC (permalink / raw)
  To: Yongseok Koh, shahafs; +Cc: dev

On 4/10/2019 12:13 AM, Yongseok Koh wrote:
> UAR (User Access Region) register does not need to be remapped for primary
> process but it should be remapped only for secondary process. UAR register
> table is in the process private structure in rte_eth_devices[],
> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
> 
> The actual UAR table follows the data structure and the table is used for
> both Tx and Rx.
> 
> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
> is defined to get a register for the txq. Processes access its own private
> data to acquire the register from the UAR table.
> 
> For Rx, the doorbell in UAR is required in arming CQ event. However, it is
> a known issue that the register isn't remapped for secondary process.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

<...>

> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>  		}
>  }
>  
> +/**
> + * Initialize Tx UAR registers for primary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
> +{
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(ppriv);
> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
> +#ifndef RTE_ARCH_64
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	unsigned int lock_idx;
> +	/* Assign an UAR lock according to UAR page number */
> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> +		   MLX5_UAR_PAGE_NUM_MASK;
> +	txq->uar_lock = &priv->uar_lock[lock_idx];
> +#endif
> +}

This won't compile for arch is not 64bits, since 'page_size' in that block is
not defined.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-10 17:50     ` Ferruh Yigit
@ 2019-04-10 17:50       ` Ferruh Yigit
  2019-04-10 19:12       ` Yongseok Koh
  1 sibling, 0 replies; 66+ messages in thread
From: Ferruh Yigit @ 2019-04-10 17:50 UTC (permalink / raw)
  To: Yongseok Koh, shahafs; +Cc: dev

On 4/10/2019 12:13 AM, Yongseok Koh wrote:
> UAR (User Access Region) register does not need to be remapped for primary
> process but it should be remapped only for secondary process. UAR register
> table is in the process private structure in rte_eth_devices[],
> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
> 
> The actual UAR table follows the data structure and the table is used for
> both Tx and Rx.
> 
> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
> is defined to get a register for the txq. Processes access its own private
> data to acquire the register from the UAR table.
> 
> For Rx, the doorbell in UAR is required in arming CQ event. However, it is
> a known issue that the register isn't remapped for secondary process.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

<...>

> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>  		}
>  }
>  
> +/**
> + * Initialize Tx UAR registers for primary process.
> + *
> + * @param txq_ctrl
> + *   Pointer to Tx queue control structure.
> + */
> +static void
> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
> +{
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +
> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +	assert(ppriv);
> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
> +#ifndef RTE_ARCH_64
> +	struct mlx5_priv *priv = txq_ctrl->priv;
> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
> +	unsigned int lock_idx;
> +	/* Assign an UAR lock according to UAR page number */
> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
> +		   MLX5_UAR_PAGE_NUM_MASK;
> +	txq->uar_lock = &priv->uar_lock[lock_idx];
> +#endif
> +}

This won't compile for arch is not 64bits, since 'page_size' in that block is
not defined.



^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-04-10  6:58   ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Shahaf Shuler
  2019-04-10  6:58     ` Shahaf Shuler
@ 2019-04-10 17:50     ` Ferruh Yigit
  2019-04-10 17:50       ` Ferruh Yigit
  1 sibling, 1 reply; 66+ messages in thread
From: Ferruh Yigit @ 2019-04-10 17:50 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

On 4/10/2019 7:58 AM, Shahaf Shuler wrote:
> Wednesday, April 10, 2019 2:13 AM, Yongseok Koh:
>> Subject: [PATCH v4 0/4] net/mlx: remove device register remap
>>
>> This patchset lifts the requirement of reserving huge virtual address space
>> and remapping device UAR register on to it in order to use the same address
>> between primary and secondary process.
>>
> 
> Series applied to next-net-mlx, thanks. 

Hi Shahaf,

patchset is not pulled because of build error on 3/4, fyi.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 0/4] net/mlx: remove device register remap
  2019-04-10 17:50     ` Ferruh Yigit
@ 2019-04-10 17:50       ` Ferruh Yigit
  0 siblings, 0 replies; 66+ messages in thread
From: Ferruh Yigit @ 2019-04-10 17:50 UTC (permalink / raw)
  To: Shahaf Shuler, Yongseok Koh; +Cc: dev

On 4/10/2019 7:58 AM, Shahaf Shuler wrote:
> Wednesday, April 10, 2019 2:13 AM, Yongseok Koh:
>> Subject: [PATCH v4 0/4] net/mlx: remove device register remap
>>
>> This patchset lifts the requirement of reserving huge virtual address space
>> and remapping device UAR register on to it in order to use the same address
>> between primary and secondary process.
>>
> 
> Series applied to next-net-mlx, thanks. 

Hi Shahaf,

patchset is not pulled because of build error on 3/4, fyi.



^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
  2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
                   ` (6 preceding siblings ...)
  2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
@ 2019-04-10 18:41 ` Yongseok Koh
  2019-04-10 18:41   ` Yongseok Koh
                     ` (5 more replies)
  7 siblings, 6 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v5:
* fix 32-bit build issue
* remove MLX[4|5]_UAR_SIZE and MLX[4|5]_UAR_OFFSET

v4:
* add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
* remove the number of Rx queues from the ppriv size calculation
* move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
* rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

[1] http://patches.dpdk.org/patch/52435/

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
 drivers/net/mlx4/mlx4.h            |  25 ++--
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
 drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
 drivers/net/mlx5/mlx5.h            |  17 ++-
 drivers/net/mlx5/mlx5_defs.h       |  10 --
 drivers/net/mlx5/mlx5_ethdev.c     |   3 +
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
 drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 200 +++++++++++++++++------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 19 files changed, 414 insertions(+), 595 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
@ 2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

This patchset lifts the requirement of reserving huge virtual address space
and remapping device UAR register on to it in order to use the same address
between primary and secondary process.

v5:
* fix 32-bit build issue
* remove MLX[4|5]_UAR_SIZE and MLX[4|5]_UAR_OFFSET

v4:
* add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
* remove the number of Rx queues from the ppriv size calculation
* move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
* rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]

v3:
* move UAR table to per-process storage

v2:
* rebase on the latest branch tip
* fix a bug

[1] http://patches.dpdk.org/patch/52435/

Yongseok Koh (4):
  net/mlx5: fix recursive inclusion of header file
  net/mlx5: remove redundant queue index
  net/mlx5: remove device register remap
  net/mlx4: remove device register remap

 drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
 drivers/net/mlx4/mlx4.h            |  25 ++--
 drivers/net/mlx4/mlx4_prm.h        |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
 drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
 drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
 drivers/net/mlx5/mlx5.h            |  17 ++-
 drivers/net/mlx5/mlx5_defs.h       |  10 --
 drivers/net/mlx5/mlx5_ethdev.c     |   3 +
 drivers/net/mlx5/mlx5_flow.c       |   5 +-
 drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
 drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
 drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
 drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
 drivers/net/mlx5/mlx5_stats.c      |  15 +--
 drivers/net/mlx5/mlx5_trigger.c    |   8 +-
 drivers/net/mlx5/mlx5_txq.c        | 200 +++++++++++++++++------------
 drivers/net/mlx5/mlx5_vlan.c       |   3 +-
 19 files changed, 414 insertions(+), 595 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
  2019-04-10 18:41   ` Yongseok Koh
@ 2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41     ` Yongseok Koh
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index Yongseok Koh
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9b2251bf2..960a2f8191 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-10 18:41     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

mlx5.h includes mlx5_rxtx.h and mlx5_rxtx.h includes mlx5.h recursively.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5.h            | 1 -
 drivers/net/mlx5/mlx5_flow.c       | 5 +++--
 drivers/net/mlx5/mlx5_flow_dv.c    | 4 +++-
 drivers/net/mlx5/mlx5_flow_verbs.c | 5 +++--
 drivers/net/mlx5/mlx5_vlan.c       | 3 ++-
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c9b2251bf2..960a2f8191 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -33,7 +33,6 @@
 
 #include "mlx5_utils.h"
 #include "mlx5_mr.h"
-#include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9dc492ad2d..1c78a5f8ea 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -30,9 +30,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 52be8b32c1..ccb2f7593f 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -29,9 +29,11 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
 #include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
+
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
 
 #ifndef HAVE_IBV_FLOW_DEVX_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 49dd13e6d2..3956df1a7e 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -29,9 +29,10 @@
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
-#include "mlx5_prm.h"
-#include "mlx5_glue.h"
 #include "mlx5_flow.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+#include "mlx5_rxtx.h"
 
 #define VERBS_SPEC_INNER(item_flags) \
 	(!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0)
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 6568a3a475..4004930942 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -27,10 +27,11 @@
 #include <rte_ethdev_driver.h>
 #include <rte_common.h>
 
-#include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_glue.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_utils.h"
 
 /**
  * DPDK callback to configure a VLAN filter.
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
  2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
@ 2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41     ` Yongseok Koh
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap Yongseok Koh
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-10 18:41     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

Queue index is redundantly stored for both Rx and Tx structures.
E.g. txq_ctrl->idx and txq->stats.idx. Both are consolidated to single
storage - rxq->idx and txq->idx.

Also, rxq and txq are moved to the beginning of its control structure
(rxq_ctrl and txq_ctrl) for cacheline alignment.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c     | 29 ++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h    | 10 ++++------
 drivers/net/mlx5/mlx5_stats.c   | 15 ++++++---------
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 drivers/net/mlx5/mlx5_txq.c     | 21 ++++++++++-----------
 5 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcb97c2100..8a84b0a1b5 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -156,7 +156,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments",
-		rxq->port_id, rxq_ctrl->idx, wqe_n);
+		rxq->port_id, rxq->idx, wqe_n);
 	return 0;
 error:
 	err = rte_errno; /* Save rte_errno before cleanup. */
@@ -168,7 +168,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq->mprq_bufs)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -241,7 +241,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	DRV_LOG(DEBUG,
 		"port %u Rx queue %u allocated and configured %u segments"
 		" (max %u packets)",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
 		elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	return 0;
 error:
@@ -253,7 +253,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 		(*rxq_ctrl->rxq.elts)[i] = NULL;
 	}
 	DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	rte_errno = err; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -287,7 +287,7 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
-		rxq->port_id, rxq_ctrl->idx);
+		rxq->port_id, rxq->idx);
 	if (rxq->mprq_bufs == NULL)
 		return;
 	assert(mlx5_rxq_check_vec_support(rxq) < 0);
@@ -318,7 +318,7 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
 	uint16_t i;
 
 	DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq->idx);
 	if (rxq->elts == NULL)
 		return;
 	/**
@@ -364,7 +364,7 @@ void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
-		PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+		PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
 	if (rxq_ctrl->ibv)
 		mlx5_rxq_ibv_release(rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
@@ -495,11 +495,11 @@ mlx5_rx_queue_release(void *dpdk_rxq)
 		return;
 	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
-	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+	if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
 		rte_panic("port %u Rx queue %u is still used by a flow and"
 			  " cannot be removed\n",
-			  PORT_ID(priv), rxq_ctrl->idx);
-	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
+			  PORT_ID(priv), rxq->idx);
+	mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
 }
 
 /**
@@ -793,7 +793,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (!tmpl) {
 		DRV_LOG(ERR,
 			"port %u Rx queue %u cannot allocate verbs resources",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_data->idx);
 		rte_errno = ENOMEM;
 		goto error;
 	}
@@ -1104,7 +1104,7 @@ mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
-			dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
+			dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
@@ -1470,7 +1470,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->rxq.port_id = dev->data->port_id;
 	tmpl->priv = priv;
 	tmpl->rxq.mp = mp;
-	tmpl->rxq.stats.idx = idx;
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.rq_repl_thresh =
 		MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
@@ -1479,7 +1478,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 #ifndef RTE_ARCH_64
 	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
 #endif
-	tmpl->idx = idx;
+	tmpl->rxq.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
 	return tmpl;
@@ -1592,7 +1591,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
-			dev->data->port_id, rxq_ctrl->idx);
+			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
 	}
 	return ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index ced9945888..7b58063ceb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -41,7 +41,6 @@
 #define MLX5_FLOW_TUNNEL 5
 
 struct mlx5_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t ipackets; /**< Total of successfully received packets. */
 	uint64_t ibytes; /**< Total of successfully received bytes. */
@@ -51,7 +50,6 @@ struct mlx5_rxq_stats {
 };
 
 struct mlx5_txq_stats {
-	unsigned int idx; /**< Mapping index. */
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
@@ -116,6 +114,7 @@ struct mlx5_rxq_data {
 	struct rte_mempool *mp;
 	struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
 	struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -141,14 +140,13 @@ struct mlx5_rxq_ibv {
 
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
+	struct mlx5_rxq_data rxq; /* Data path structure. */
 	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_rxq_data rxq; /* Data path structure. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	unsigned int irq:1; /* Whether IRQ is enabled. */
-	uint16_t idx; /* Queue index. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
 };
@@ -205,6 +203,7 @@ struct mlx5_txq_data {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
 	rte_spinlock_t *uar_lock;
@@ -223,6 +222,7 @@ struct mlx5_txq_ibv {
 
 /* TX queue control descriptor. */
 struct mlx5_txq_ctrl {
+	struct mlx5_txq_data txq; /* Data path structure. */
 	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
 	rte_atomic32_t refcnt; /* Reference counter. */
 	unsigned int socket; /* CPU socket ID for allocations. */
@@ -230,10 +230,8 @@ struct mlx5_txq_ctrl {
 	unsigned int max_tso_header; /* Max TSO header size. */
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
-	struct mlx5_txq_data txq; /* Data path structure. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
-	uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 5af199d0d5..ed50667f45 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -386,7 +386,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (rxq == NULL)
 			continue;
-		idx = rxq->stats.idx;
+		idx = rxq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_ipackets[idx] += rxq->stats.ipackets;
@@ -407,7 +407,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 		if (txq == NULL)
 			continue;
-		idx = txq->stats.idx;
+		idx = txq->idx;
 		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
 #ifdef MLX5_PMD_SOFT_COUNTERS
 			tmp.q_opackets[idx] += txq->stats.opackets;
@@ -442,21 +442,18 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
 	unsigned int i;
-	unsigned int idx;
 
 	for (i = 0; (i != priv->rxqs_n); ++i) {
 		if ((*priv->rxqs)[i] == NULL)
 			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx5_rxq_stats){ .idx = idx };
+		memset(&(*priv->rxqs)[i]->stats, 0,
+		       sizeof(struct mlx5_rxq_stats));
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
 		if ((*priv->txqs)[i] == NULL)
 			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx5_txq_stats){ .idx = idx };
+		memset(&(*priv->txqs)[i]->stats, 0,
+		       sizeof(struct mlx5_txq_stats));
 	}
 	mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base);
 #ifndef MLX5_PMD_SOFT_COUNTERS
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 5b73f0ff03..7c1e5594d6 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -123,7 +123,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
 		DRV_LOG(DEBUG,
 			"port %u Rx queue %u registering"
 			" mp %s having %u chunks",
-			dev->data->port_id, rxq_ctrl->idx,
+			dev->data->port_id, rxq_ctrl->rxq.idx,
 			mp->name, mp->nb_mem_chunks);
 		mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp);
 		ret = rxq_alloc_elts(rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 1b3d89f2f6..4bd08cb035 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -48,7 +48,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
 	DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -70,7 +70,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 	struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
 	DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
-		PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
+		PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
 	txq_ctrl->txq.elts_comp = 0;
@@ -224,7 +224,7 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		if ((*priv->txqs)[i] == txq) {
 			mlx5_txq_release(ETH_DEV(priv), i);
 			DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
-				PORT_ID(priv), txq_ctrl->idx);
+				PORT_ID(priv), txq->idx);
 			break;
 		}
 }
@@ -273,7 +273,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-		assert(txq_ctrl->idx == (uint16_t)i);
+		assert(txq->idx == (uint16_t)i);
 		/* UAR addr form verbs used to find dup and offset in page. */
 		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
 		off = uar_va & (page_size - 1); /* offset in page. */
@@ -301,7 +301,7 @@ mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 				DRV_LOG(ERR,
 					"port %u call to mmap failed on UAR"
 					" for txq %u",
-					dev->data->port_id, txq_ctrl->idx);
+					dev->data->port_id, txq->idx);
 				rte_errno = ENXIO;
 				return -rte_errno;
 			}
@@ -629,7 +629,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 
 	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
 		DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
-			dev->data->port_id, txq_ibv->txq_ctrl->idx);
+			dev->data->port_id, txq_ibv->txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
@@ -778,7 +778,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
-	tmpl->idx = idx;
+	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
@@ -786,7 +786,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
 	tmpl->txq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
-	tmpl->txq.stats.idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
 	return tmpl;
@@ -893,12 +892,12 @@ int
 mlx5_txq_verify(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_txq_ctrl *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+	LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) {
 		DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
-			dev->data->port_id, txq->idx);
+			dev->data->port_id, txq_ctrl->txq.idx);
 		++ret;
 	}
 	return ret;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
                     ` (2 preceding siblings ...)
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index Yongseok Koh
@ 2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41     ` Yongseok Koh
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 4/4] net/mlx4: " Yongseok Koh
  2019-04-11  8:40   ` [dpdk-dev] [PATCH v5 0/4] net/mlx: " Shahaf Shuler
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 228 ++++++++++------------------------------
 drivers/net/mlx5/mlx5.h         |  16 ++-
 drivers/net/mlx5/mlx5_defs.h    |  10 --
 drivers/net/mlx5/mlx5_ethdev.c  |   3 +
 drivers/net/mlx5/mlx5_rxtx.h    |  11 +-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 181 +++++++++++++++++++------------
 7 files changed, 186 insertions(+), 269 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 475c93ddf9..9ff50dfbe4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -546,6 +522,54 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 }
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size =
+		sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback to close the device.
  *
  * Destroy all queues and objects, free memory.
@@ -589,6 +613,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_proc_priv_uninit(dev);
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -913,132 +938,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1054,7 +953,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1070,18 +968,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1090,23 +982,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1197,12 +1072,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		}
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
+		err = mlx5_proc_priv_init(eth_dev);
+		if (err)
+			return NULL;
 		/* Receive command fd from primary process */
 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 960a2f8191..14c7f3c6fb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
@@ -359,6 +366,7 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+int mlx5_proc_priv_init(struct rte_eth_dev *dev);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index bfe6655800..69b6960e94 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -91,16 +91,6 @@
 /* Timeout in seconds to get a valid link status. */
 #define MLX5_LINK_STATUS_TIMEOUT 10
 
-/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
-
 /* Maximum number of UAR pages used by a port,
  * These are the size and mask for an array of mutexes used to synchronize
  * the access to port's UARs on platforms that do not support 64 bit writes.
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1e6fe192a6..3992918c57 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -450,6 +450,9 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	ret = mlx5_proc_priv_init(dev);
+	if (ret)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..9965b2b771 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,100 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+#ifndef RTE_ARCH_64
+	unsigned int lock_idx;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+#endif
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
+
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for secondary process.
  *
- * @param[in] dev
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +331,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +549,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +562,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +821,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +880,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-10 18:41     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5.c         | 228 ++++++++++------------------------------
 drivers/net/mlx5/mlx5.h         |  16 ++-
 drivers/net/mlx5/mlx5_defs.h    |  10 --
 drivers/net/mlx5/mlx5_ethdev.c  |   3 +
 drivers/net/mlx5/mlx5_rxtx.h    |  11 +-
 drivers/net/mlx5/mlx5_trigger.c |   6 --
 drivers/net/mlx5/mlx5_txq.c     | 181 +++++++++++++++++++------------
 7 files changed, 186 insertions(+), 269 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 475c93ddf9..9ff50dfbe4 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -449,30 +449,6 @@ mlx5_init_shared_data(void)
 }
 
 /**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx5_shared_data_lock);
-	if (mlx5_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
-		}
-		mlx5_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
-/**
  * Retrieve integer value from environment variable.
  *
  * @param[in] name
@@ -546,6 +522,54 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 }
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size =
+		sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback to close the device.
  *
  * Destroy all queues and objects, free memory.
@@ -589,6 +613,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
+	mlx5_proc_priv_uninit(dev);
 	mlx5_mprq_free_mp(dev);
 	mlx5_mr_release(dev);
 	assert(priv->sh);
@@ -913,132 +938,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR,
-			"Failed to reserve UAR address space, please"
-			" adjust MLX5_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX5_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
-	struct mlx5_shared_data *sd = mlx5_shared_data;
-	struct mlx5_local_data *ld = &mlx5_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		DRV_LOG(ERR,
-			"UAR address %p size %llu occupied, please"
-			" adjust MLX5_UAR_OFFSET or try EAL parameter"
-			" --base-virtaddr",
-			sd->uar_base, MLX5_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
-	struct mlx5_local_data *ld = &mlx5_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX5_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -1054,7 +953,6 @@ mlx5_init_once(void)
 {
 	struct mlx5_shared_data *sd;
 	struct mlx5_local_data *ld = &mlx5_local_data;
-	int ret;
 
 	if (mlx5_init_shared_data())
 		return -rte_errno;
@@ -1070,18 +968,12 @@ mlx5_init_once(void)
 		rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
 						mlx5_mr_mem_event_cb, NULL);
 		mlx5_mp_init_primary();
-		ret = mlx5_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx5_mp_init_secondary();
-		ret = mlx5_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -1090,23 +982,6 @@ mlx5_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx5_uar_uninit_primary();
-		mlx5_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx5_uar_uninit_secondary();
-		mlx5_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx5_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1197,12 +1072,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		}
 		eth_dev->device = dpdk_dev;
 		eth_dev->dev_ops = &mlx5_dev_sec_ops;
+		err = mlx5_proc_priv_init(eth_dev);
+		if (err)
+			return NULL;
 		/* Receive command fd from primary process */
 		err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
 		if (err < 0)
 			return NULL;
 		/* Remap UAR for Tx queues. */
-		err = mlx5_tx_uar_remap(eth_dev, err);
+		err = mlx5_tx_uar_init_secondary(eth_dev, err);
 		if (err)
 			return NULL;
 		/*
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 960a2f8191..14c7f3c6fb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -97,8 +97,6 @@ struct mlx5_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx5_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -106,8 +104,6 @@ struct mlx5_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx5_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx5_shared_data *mlx5_shared_data;
@@ -282,6 +278,17 @@ struct mlx5_ibv_shared {
 	struct mlx5_ibv_shared_port port[]; /* per device port data array. */
 };
 
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+	((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
 struct mlx5_priv {
 	LIST_ENTRY(mlx5_priv) mem_event_cb;
 	/**< Called by memory event callback. */
@@ -359,6 +366,7 @@ struct mlx5_priv {
 /* mlx5.c */
 
 int mlx5_getenv_int(const char *);
+int mlx5_proc_priv_init(struct rte_eth_dev *dev);
 
 /* mlx5_ethdev.c */
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index bfe6655800..69b6960e94 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -91,16 +91,6 @@
 /* Timeout in seconds to get a valid link status. */
 #define MLX5_LINK_STATUS_TIMEOUT 10
 
-/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
-
 /* Maximum number of UAR pages used by a port,
  * These are the size and mask for an array of mutexes used to synchronize
  * the access to port's UARs on platforms that do not support 64 bit writes.
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 1e6fe192a6..3992918c57 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -450,6 +450,9 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 		if (++j == rxqs_n)
 			j = 0;
 	}
+	ret = mlx5_proc_priv_init(dev);
+	if (ret)
+		return ret;
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7b58063ceb..5d49892429 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -201,8 +201,8 @@ struct mlx5_txq_data {
 	volatile void *wqes; /* Work queue (use volatile to write into). */
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
-	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
+	uint16_t port_id; /* Port ID of device. */
 	uint16_t idx; /* Queue index. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 #ifndef RTE_ARCH_64
@@ -231,9 +231,12 @@ struct mlx5_txq_ctrl {
 	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
-	volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+	void *bf_reg; /* BlueFlame register from Verbs. */
 };
 
+#define MLX5_TX_BFREG(txq) \
+		(MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -301,7 +304,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev);
 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			unsigned int socket, const struct rte_eth_txconf *conf);
 void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
@@ -704,7 +707,7 @@ static __rte_always_inline void
 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
 		       int cond)
 {
-	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+	uint64_t *dst = MLX5_TX_BFREG(txq);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
 	rte_cio_wmb();
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 7c1e5594d6..b7fde35758 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev)
 			goto error;
 		}
 	}
-	ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
-	if (ret) {
-		/* Adjust index for rollback. */
-		i = priv->txqs_n - 1;
-		goto error;
-	}
 	return 0;
 error:
 	ret = rte_errno; /* Save rte_errno before cleanup. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4bd08cb035..9965b2b771 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -229,13 +229,100 @@ mlx5_tx_queue_release(void *dpdk_txq)
 		}
 }
 
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+#ifndef RTE_ARCH_64
+	unsigned int lock_idx;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+#endif
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+	/* Assign an UAR lock according to UAR page number */
+	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+		   MLX5_UAR_PAGE_NUM_MASK;
+	txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
+
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+	struct mlx5_priv *priv = txq_ctrl->priv;
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+	struct mlx5_txq_data *txq = &txq_ctrl->txq;
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx5 function: mlx5_init_context()
+	 */
+	uar_va = (uintptr_t)txq_ctrl->bf_reg;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq_ctrl->uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		DRV_LOG(ERR,
+			"port %u mmap failed for BF reg of txq %u",
+			txq->port_id, txq->idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ *   Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq_ctrl->txq.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for secondary process.
  *
- * @param[in] dev
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -244,81 +331,36 @@ mlx5_tx_queue_release(void *dpdk_txq)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	unsigned int i, j;
-	uintptr_t pages[priv->txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct mlx5_txq_data *txq;
 	struct mlx5_txq_ctrl *txq_ctrl;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
-	unsigned int lock_idx;
-#endif
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx5 function: mlx5_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != priv->txqs_n; ++i) {
 		if (!(*priv->txqs)[i])
 			continue;
 		txq = (*priv->txqs)[i];
 		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
 		assert(txq->idx == (uint16_t)i);
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq_ctrl->uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap have to return same address */
-				DRV_LOG(ERR,
-					"port %u call to mmap failed on UAR"
-					" for txq %u",
-					dev->data->port_id, txq->idx);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
-			txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq_ctrl->txq.bf_reg ==
-			       RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
-		/* Assign a UAR lock according to UAR page number */
-		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
-			   MLX5_UAR_PAGE_NUM_MASK;
-		txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+		ret = txq_uar_init_secondary(txq_ctrl, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		if (!(*priv->txqs)[i])
+			continue;
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		txq_uar_uninit_secondary(txq_ctrl);
+	} while (i--);
+	return -rte_errno;
 }
 
 /**
@@ -507,7 +549,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_data->wqes = qp.sq.buf;
 	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
 	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-	txq_ctrl->bf_reg_orig = qp.bf.reg;
 	txq_data->cq_db = cq_info.dbrec;
 	txq_data->cqes =
 		(volatile struct mlx5_cqe (*)[])
@@ -521,6 +562,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	txq_ibv->qp = tmpl.qp;
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
+	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
 		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
@@ -778,6 +821,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->priv = priv;
 	tmpl->socket = socket;
 	tmpl->txq.elts_n = log2above(desc);
+	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
 	txq_set_params(tmpl);
 	DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
@@ -836,15 +880,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_txq_ctrl *txq;
-	size_t page_size = sysconf(_SC_PAGESIZE);
 
 	if (!(*priv->txqs)[idx])
 		return 0;
 	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
 	if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
 		txq->ibv = NULL;
-	munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
-	       page_size);
 	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
 		txq_free_elts(txq);
 		mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 4/4] net/mlx4: remove device register remap
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
                     ` (3 preceding siblings ...)
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap Yongseok Koh
@ 2019-04-10 18:41   ` Yongseok Koh
  2019-04-10 18:41     ` Yongseok Koh
  2019-04-11  8:40   ` [dpdk-dev] [PATCH v5 0/4] net/mlx: " Shahaf Shuler
  5 siblings, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 255 +++++++++++--------------------------------
 drivers/net/mlx4/mlx4.h      |  25 ++---
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 ++++++++++++++++++-----------
 6 files changed, 183 insertions(+), 278 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9bca0ce9cd..17dfcd5a3b 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -207,6 +183,53 @@ mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
 #endif
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     dev->data->nb_tx_queues * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback for Ethernet device configuration.
  *
  * @param dev
@@ -232,9 +255,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 		goto exit;
 	}
 	ret = mlx4_intr_install(priv);
-	if (ret)
+	if (ret) {
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+		goto exit;
+	}
+	ret = mlx4_proc_priv_init(dev);
+	if (ret) {
+		ERROR("%p: process private data allocation failed",
+		      (void *)dev);
+		goto exit;
+	}
 exit:
 	return ret;
 }
@@ -262,11 +293,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,10 +340,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
-#endif
 
 	if (!priv->started)
 		return;
@@ -331,17 +353,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
-#endif
 }
 
 /**
@@ -372,6 +383,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
 		mlx4_rx_queue_release(dev->data->rx_queues[i]);
 	for (i = 0; i != dev->data->nb_tx_queues; ++i)
 		mlx4_tx_queue_release(dev->data->tx_queues[i]);
+	mlx4_proc_priv_uninit(dev);
 	mlx4_mr_release(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
@@ -666,130 +678,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -805,7 +693,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -821,18 +708,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -841,23 +722,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1009,6 +873,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
+			err = mlx4_proc_priv_init(eth_dev);
+			if (err)
+				goto error;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
@@ -1016,7 +883,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..6224b3be1a 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -56,16 +56,6 @@
 /** Enable extending memsegs when creating a MR. */
 #define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en"
 
-/* Reserved address space for UAR mapping. */
-#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
-
 enum {
 	PCI_VENDOR_ID_MELLANOX = 0x15b3,
 };
@@ -138,8 +128,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +135,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [dpdk-dev] [PATCH v5 4/4] net/mlx4: remove device register remap
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 4/4] net/mlx4: " Yongseok Koh
@ 2019-04-10 18:41     ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 18:41 UTC (permalink / raw)
  To: shahafs; +Cc: dev

UAR (User Access Region) register does not need to be remapped for primary
process but it should be remapped only for secondary process. UAR register
table is in the process private structure in rte_eth_devices[],
	(struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private

The actual UAR table follows the data structure and the table is used for
both Tx and Rx.

For Tx, BlueFlame in UAR is used to ring the doorbell. MLX4_TX_BFREG(txq)
is defined to get a register for the txq. Processes access its own private
data to acquire the register from the UAR table.

For Rx, the doorbell in UAR is required in arming CQ event. However, it is
a known issue that the register isn't remapped for secondary process.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 drivers/net/mlx4/mlx4.c      | 255 +++++++++++--------------------------------
 drivers/net/mlx4/mlx4.h      |  25 ++---
 drivers/net/mlx4/mlx4_prm.h  |   3 +-
 drivers/net/mlx4/mlx4_rxtx.c |   2 +-
 drivers/net/mlx4/mlx4_rxtx.h |   6 +-
 drivers/net/mlx4/mlx4_txq.c  | 170 ++++++++++++++++++-----------
 6 files changed, 183 insertions(+), 278 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 9bca0ce9cd..17dfcd5a3b 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -126,30 +126,6 @@ mlx4_init_shared_data(void)
 	return ret;
 }
 
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
-	const struct rte_memzone *mz;
-
-	rte_spinlock_lock(&mlx4_shared_data_lock);
-	if (mlx4_shared_data) {
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-			mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
-			rte_memzone_free(mz);
-		} else {
-			memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
-		}
-		mlx4_shared_data = NULL;
-	}
-	rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
 /**
  * Verbs callback to allocate a memory. This function should allocate the space
@@ -207,6 +183,53 @@ mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
 #endif
 
 /**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_proc_priv_init(struct rte_eth_dev *dev)
+{
+	struct mlx4_proc_priv *ppriv;
+	size_t ppriv_size;
+
+	/*
+	 * UAR register table follows the process private structure. BlueFlame
+	 * registers for Tx queues are stored in the table.
+	 */
+	ppriv_size = sizeof(struct mlx4_proc_priv) +
+		     dev->data->nb_tx_queues * sizeof(void *);
+	ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+				  RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+	if (!ppriv) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	ppriv->uar_table_sz = ppriv_size;
+	dev->process_private = ppriv;
+	return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+	if (!dev->process_private)
+		return;
+	rte_free(dev->process_private);
+	dev->process_private = NULL;
+}
+
+/**
  * DPDK callback for Ethernet device configuration.
  *
  * @param dev
@@ -232,9 +255,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 		goto exit;
 	}
 	ret = mlx4_intr_install(priv);
-	if (ret)
+	if (ret) {
 		ERROR("%p: interrupt handler installation failed",
 		      (void *)dev);
+		goto exit;
+	}
+	ret = mlx4_proc_priv_init(dev);
+	if (ret) {
+		ERROR("%p: process private data allocation failed",
+		      (void *)dev);
+		goto exit;
+	}
 exit:
 	return ret;
 }
@@ -262,11 +293,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
-	if (ret) {
-		ERROR("%p: cannot remap UAR", (void *)dev);
-		goto err;
-	}
 	ret = mlx4_rss_init(priv);
 	if (ret) {
 		ERROR("%p: cannot initialize RSS resources: %s",
@@ -314,10 +340,6 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct mlx4_priv *priv = dev->data->dev_private;
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	const size_t page_size = sysconf(_SC_PAGESIZE);
-	int i;
-#endif
 
 	if (!priv->started)
 		return;
@@ -331,17 +353,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	mlx4_flow_sync(priv, NULL);
 	mlx4_rxq_intr_disable(priv);
 	mlx4_rss_deinit(priv);
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
-	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
-		struct txq *txq;
-
-		txq = dev->data->tx_queues[i];
-		if (!txq)
-			continue;
-		munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
-					       page_size), page_size);
-	}
-#endif
 }
 
 /**
@@ -372,6 +383,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
 		mlx4_rx_queue_release(dev->data->rx_queues[i]);
 	for (i = 0; i != dev->data->nb_tx_queues; ++i)
 		mlx4_tx_queue_release(dev->data->tx_queues[i]);
+	mlx4_proc_priv_uninit(dev);
 	mlx4_mr_release(dev);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
@@ -666,130 +678,6 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd,
 
 static struct rte_pci_driver mlx4_driver;
 
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
-		const struct rte_memseg *ms, void *arg)
-{
-	void **addr = arg;
-
-	if (msl->external)
-		return 0;
-	if (*addr == NULL)
-		*addr = ms->addr;
-	else
-		*addr = RTE_MIN(*addr, ms->addr);
-
-	return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	void *addr = (void *)0;
-
-	if (sd->uar_base)
-		return 0;
-	/* find out lower bound of hugepage segments */
-	rte_memseg_walk(find_lower_va_bound, &addr);
-	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(addr, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("failed to reserve UAR address space, please"
-		      " adjust MLX4_UAR_SIZE or try --base-virtaddr");
-		rte_errno = ENOMEM;
-		return -rte_errno;
-	}
-	/* Accept either same addr or a new addr returned from mmap if target
-	 * range occupied.
-	 */
-	INFO("reserved UAR address space: %p", addr);
-	sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-
-	if (!sd->uar_base)
-		return;
-	munmap(sd->uar_base, MLX4_UAR_SIZE);
-	sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
-	struct mlx4_shared_data *sd = mlx4_shared_data;
-	struct mlx4_local_data *ld = &mlx4_local_data;
-	void *addr;
-
-	if (ld->uar_base) { /* Already reserved. */
-		assert(sd->uar_base == ld->uar_base);
-		return 0;
-	}
-	assert(sd->uar_base);
-	/* anonymous mmap, no real memory consumption. */
-	addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
-		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		ERROR("UAR mmap failed: %p size: %llu",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	if (sd->uar_base != addr) {
-		ERROR("UAR address %p size %llu occupied, please"
-		      " adjust MLX4_UAR_OFFSET or try EAL parameter"
-		      " --base-virtaddr",
-		      sd->uar_base, MLX4_UAR_SIZE);
-		rte_errno = ENXIO;
-		return -rte_errno;
-	}
-	ld->uar_base = addr;
-	INFO("reserved UAR address space: %p", addr);
-	return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
-	struct mlx4_local_data *ld = &mlx4_local_data;
-
-	if (!ld->uar_base)
-		return;
-	munmap(ld->uar_base, MLX4_UAR_SIZE);
-	ld->uar_base = NULL;
-}
-
 /**
  * PMD global initialization.
  *
@@ -805,7 +693,6 @@ mlx4_init_once(void)
 {
 	struct mlx4_shared_data *sd;
 	struct mlx4_local_data *ld = &mlx4_local_data;
-	int ret;
 
 	if (mlx4_init_shared_data())
 		return -rte_errno;
@@ -821,18 +708,12 @@ mlx4_init_once(void)
 		rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
 						mlx4_mr_mem_event_cb, NULL);
 		mlx4_mp_init_primary();
-		ret = mlx4_uar_init_primary();
-		if (ret)
-			goto error;
 		sd->init_done = true;
 		break;
 	case RTE_PROC_SECONDARY:
 		if (ld->init_done)
 			break;
 		mlx4_mp_init_secondary();
-		ret = mlx4_uar_init_secondary();
-		if (ret)
-			goto error;
 		++sd->secondary_cnt;
 		ld->init_done = true;
 		break;
@@ -841,23 +722,6 @@ mlx4_init_once(void)
 	}
 	rte_spinlock_unlock(&sd->lock);
 	return 0;
-error:
-	switch (rte_eal_process_type()) {
-	case RTE_PROC_PRIMARY:
-		mlx4_uar_uninit_primary();
-		mlx4_mp_uninit_primary();
-		rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
-		break;
-	case RTE_PROC_SECONDARY:
-		mlx4_uar_uninit_secondary();
-		mlx4_mp_uninit_secondary();
-		break;
-	default:
-		break;
-	}
-	rte_spinlock_unlock(&sd->lock);
-	mlx4_uninit_shared_data();
-	return -rte_errno;
 }
 
 /**
@@ -1009,6 +873,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			}
 			eth_dev->device = &pci_dev->device;
 			eth_dev->dev_ops = &mlx4_dev_sec_ops;
+			err = mlx4_proc_priv_init(eth_dev);
+			if (err)
+				goto error;
 			/* Receive command fd from primary process. */
 			err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
 			if (err < 0) {
@@ -1016,7 +883,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				goto error;
 			}
 			/* Remap UAR for Tx queues. */
-			err = mlx4_tx_uar_remap(eth_dev, err);
+			err = mlx4_tx_uar_init_secondary(eth_dev, err);
 			if (err) {
 				err = rte_errno;
 				goto error;
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 1db23d6cc9..6224b3be1a 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -56,16 +56,6 @@
 /** Enable extending memsegs when creating a MR. */
 #define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en"
 
-/* Reserved address space for UAR mapping. */
-#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
-
 enum {
 	PCI_VENDOR_ID_MELLANOX = 0x15b3,
 };
@@ -138,8 +128,6 @@ struct mlx4_shared_data {
 	/* Global spinlock for primary and secondary processes. */
 	int init_done; /* Whether primary has done initialization. */
 	unsigned int secondary_cnt; /* Number of secondary processes init'd. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 	struct mlx4_dev_list mem_event_cb_list;
 	rte_rwlock_t mem_event_rwlock;
 };
@@ -147,12 +135,21 @@ struct mlx4_shared_data {
 /* Per-process data structure, not visible to other processes. */
 struct mlx4_local_data {
 	int init_done; /* Whether a secondary has done initialization. */
-	void *uar_base;
-	/* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
 };
 
 extern struct mlx4_shared_data *mlx4_shared_data;
 
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+	size_t uar_table_sz;
+	/* Size of UAR register table. */
+	void *uar_table[];
+	/* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+	((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
 /** Private data structure. */
 struct mlx4_priv {
 	LIST_ENTRY(mlx4_priv) mem_event_cb;
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
index b3e11dde25..16ae6db82d 100644
--- a/drivers/net/mlx4/mlx4_prm.h
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -77,8 +77,7 @@ struct mlx4_sq {
 	uint32_t owner_opcode;
 	/**< Default owner opcode with HW valid owner bit. */
 	uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
-	volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
-	volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+	uint32_t *db; /**< Pointer to the doorbell. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
 };
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index f22f1ba559..391271a616 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Make sure that descriptors are written before doorbell record. */
 	rte_wmb();
 	/* Ring QP doorbell. */
-	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
 	txq->elts_head += i;
 	return i;
 }
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 7d7a8988ed..8baf33fa94 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -97,6 +97,7 @@ struct mlx4_txq_stats {
 struct txq {
 	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
 	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	uint16_t port_id; /**< Port ID of device. */
 	unsigned int elts_head; /**< Current index in (*elts)[]. */
 	unsigned int elts_tail; /**< First element awaiting completion. */
 	int elts_comp_cd; /**< Countdown for next completion. */
@@ -118,6 +119,9 @@ struct txq {
 	uint8_t data[]; /**< Remaining queue resources. */
 };
 
+#define MLX4_TX_BFREG(txq) \
+		(MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
 /* mlx4_rxq.c */
 
 uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
@@ -152,7 +156,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
 
 /* mlx4_txq.c */
 
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
 uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
 int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			uint16_t desc, unsigned int socket,
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
index 698a648c8d..01a5efd80d 100644
--- a/drivers/net/mlx4/mlx4_txq.c
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -40,11 +40,88 @@
 #include "mlx4_utils.h"
 
 /**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
  *
- * @param[in] dev
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+	assert(ppriv);
+	ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+	struct mlx4_priv *priv = txq->priv;
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+	void *addr;
+	uintptr_t uar_va;
+	uintptr_t offset;
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+
+	assert(ppriv);
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page
+	 * size. Ref to libmlx4 function: mlx4_init_context()
+	 */
+	uar_va = (uintptr_t)txq->msq.db;
+	offset = uar_va & (page_size - 1); /* Offset in page. */
+	addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+			txq->msq.uar_mmap_offset);
+	if (addr == MAP_FAILED) {
+		ERROR("port %u mmap failed for BF reg of txq %u",
+		      txq->port_id, txq->stats.idx);
+		rte_errno = ENXIO;
+		return -rte_errno;
+	}
+	addr = RTE_PTR_ADD(addr, offset);
+	ppriv->uar_table[txq->stats.idx] = addr;
+	return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+	struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+	const size_t page_size = sysconf(_SC_PAGESIZE);
+	void *addr;
+
+	addr = ppriv->uar_table[txq->stats.idx];
+	munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
  *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
@@ -52,81 +129,41 @@
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 {
-	unsigned int i, j;
 	const unsigned int txqs_n = dev->data->nb_tx_queues;
-	uintptr_t pages[txqs_n];
-	unsigned int pages_n = 0;
-	uintptr_t uar_va;
-	uintptr_t off;
-	void *addr;
-	void *ret;
 	struct txq *txq;
-	int already_mapped;
-	size_t page_size = sysconf(_SC_PAGESIZE);
+	unsigned int i;
+	int ret;
 
-	memset(pages, 0, txqs_n * sizeof(uintptr_t));
-	/*
-	 * As rdma-core, UARs are mapped in size of OS page size.
-	 * Use aligned address to avoid duplicate mmap.
-	 * Ref to libmlx4 function: mlx4_init_context()
-	 */
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	for (i = 0; i != txqs_n; ++i) {
 		txq = dev->data->tx_queues[i];
 		if (!txq)
 			continue;
-		/* UAR addr form verbs used to find dup and offset in page. */
-		uar_va = (uintptr_t)txq->msq.qp_sdb;
-		off = uar_va & (page_size - 1); /* offset in page. */
-		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
-		already_mapped = 0;
-		for (j = 0; j != pages_n; ++j) {
-			if (pages[j] == uar_va) {
-				already_mapped = 1;
-				break;
-			}
-		}
-		/* new address in reserved UAR address space. */
-		addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
-				   uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
-		if (!already_mapped) {
-			pages[pages_n++] = uar_va;
-			/* fixed mmap to specified address in reserved
-			 * address space.
-			 */
-			ret = mmap(addr, page_size,
-				   PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-				   txq->msq.uar_mmap_offset);
-			if (ret != addr) {
-				/* fixed mmap has to return same address. */
-				ERROR("port %u call to mmap failed on UAR"
-				      " for txq %u",
-				      dev->data->port_id, i);
-				rte_errno = ENXIO;
-				return -rte_errno;
-			}
-		}
-		if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
-			txq->msq.db = RTE_PTR_ADD((void *)addr, off);
-		else
-			assert(txq->msq.db ==
-			       RTE_PTR_ADD((void *)addr, off));
+		assert(txq->stats.idx == (uint16_t)i);
+		ret = txq_uar_init_secondary(txq, fd);
+		if (ret)
+			goto error;
 	}
 	return 0;
+error:
+	/* Rollback. */
+	do {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+		txq_uar_uninit_secondary(txq);
+	} while (i--);
+	return -rte_errno;
 }
 #else
 int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+			   int fd __rte_unused)
 {
-	/*
-	 * Even if rdma-core doesn't support UAR remap, primary process
-	 * shouldn't be interrupted.
-	 */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		return 0;
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 	ERROR("UAR remap is not supported");
 	rte_errno = ENOTSUP;
 	return -rte_errno;
@@ -187,11 +224,10 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 				     (0u << MLX4_SQ_OWNER_BIT));
 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 	sq->uar_mmap_offset = dqp->uar_mmap_offset;
-	sq->qp_sdb = dqp->sdb;
 #else
 	sq->uar_mmap_offset = -1; /* Make mmap() fail. */
-	sq->db = dqp->sdb;
 #endif
+	sq->db = dqp->sdb;
 	sq->doorbell_qpn = dqp->doorbell_qpn;
 	cq->buf = dcq->buf.buf;
 	cq->cqe_cnt = dcq->cqe_cnt;
@@ -314,6 +350,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 	*txq = (struct txq){
 		.priv = priv,
+		.port_id = dev->data->port_id,
 		.stats = {
 			.idx = idx,
 		},
@@ -432,6 +469,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	}
 #endif
 	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	txq_uar_init(txq);
 	/* Save first wqe pointer in the first element. */
 	(&(*txq->elts)[0])->wqe =
 		(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
-- 
2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-10 17:50     ` Ferruh Yigit
  2019-04-10 17:50       ` Ferruh Yigit
@ 2019-04-10 19:12       ` Yongseok Koh
  2019-04-10 19:12         ` Yongseok Koh
  1 sibling, 1 reply; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 19:12 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Shahaf Shuler, dev


> On Apr 10, 2019, at 10:50 AM, Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
> On 4/10/2019 12:13 AM, Yongseok Koh wrote:
>> UAR (User Access Region) register does not need to be remapped for primary
>> process but it should be remapped only for secondary process. UAR register
>> table is in the process private structure in rte_eth_devices[],
>> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
>> 
>> The actual UAR table follows the data structure and the table is used for
>> both Tx and Rx.
>> 
>> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
>> is defined to get a register for the txq. Processes access its own private
>> data to acquire the register from the UAR table.
>> 
>> For Rx, the doorbell in UAR is required in arming CQ event. However, it is
>> a known issue that the register isn't remapped for secondary process.
>> 
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> 
> <...>
> 
>> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>> 		}
>> }
>> 
>> +/**
>> + * Initialize Tx UAR registers for primary process.
>> + *
>> + * @param txq_ctrl
>> + *   Pointer to Tx queue control structure.
>> + */
>> +static void
>> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
>> +{
>> +	struct mlx5_priv *priv = txq_ctrl->priv;
>> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
>> +
>> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>> +	assert(ppriv);
>> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
>> +#ifndef RTE_ARCH_64
>> +	struct mlx5_priv *priv = txq_ctrl->priv;
>> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
>> +	unsigned int lock_idx;
>> +	/* Assign an UAR lock according to UAR page number */
>> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
>> +		   MLX5_UAR_PAGE_NUM_MASK;
>> +	txq->uar_lock = &priv->uar_lock[lock_idx];
>> +#endif
>> +}
> 
> This won't compile for arch is not 64bits, since 'page_size' in that block is
> not defined.

It is embarrassing that I have committed so many mistakes on this last patchset.
So many contexts in my head... Or, this patches must be haunted. :-(

I always test 32-bit but it looks like a mistake when rebasing it, not sure...
My apologies. I've sent out v5.

For your convenience, here's the diff.

$ git diff yskoh/upstr-remove-uar-remap
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 904c4f5c03..6224b3be1a 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -56,16 +56,6 @@
 /** Enable extending memsegs when creating a MR. */
 #define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en"

-/* Reserved address space for UAR mapping. */
-#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
-
 enum {
        PCI_VENDOR_ID_MELLANOX = 0x15b3,
 };
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index bfe6655800..69b6960e94 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -91,16 +91,6 @@
 /* Timeout in seconds to get a valid link status. */
 #define MLX5_LINK_STATUS_TIMEOUT 10

-/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
-
 /* Maximum number of UAR pages used by a port,
  * These are the size and mask for an array of mutexes used to synchronize
  * the access to port's UARs on platforms that do not support 64 bit writes.
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5fb1761955..9965b2b771 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -240,18 +240,19 @@ txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
 {
        struct mlx5_priv *priv = txq_ctrl->priv;
        struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+#ifndef RTE_ARCH_64
+       unsigned int lock_idx;
+       const size_t page_size = sysconf(_SC_PAGESIZE);
+#endif

        assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
        assert(ppriv);
        ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
 #ifndef RTE_ARCH_64
-       struct mlx5_priv *priv = txq_ctrl->priv;
-       struct mlx5_txq_data *txq = &txq_ctrl->txq;
-       unsigned int lock_idx;
        /* Assign an UAR lock according to UAR page number */
        lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
                   MLX5_UAR_PAGE_NUM_MASK;
-       txq->uar_lock = &priv->uar_lock[lock_idx];
+       txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx];
 #endif
 }

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap
  2019-04-10 19:12       ` Yongseok Koh
@ 2019-04-10 19:12         ` Yongseok Koh
  0 siblings, 0 replies; 66+ messages in thread
From: Yongseok Koh @ 2019-04-10 19:12 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Shahaf Shuler, dev


> On Apr 10, 2019, at 10:50 AM, Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
> On 4/10/2019 12:13 AM, Yongseok Koh wrote:
>> UAR (User Access Region) register does not need to be remapped for primary
>> process but it should be remapped only for secondary process. UAR register
>> table is in the process private structure in rte_eth_devices[],
>> 	(struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private
>> 
>> The actual UAR table follows the data structure and the table is used for
>> both Tx and Rx.
>> 
>> For Tx, BlueFlame in UAR is used to ring the doorbell. MLX5_TX_BFREG(txq)
>> is defined to get a register for the txq. Processes access its own private
>> data to acquire the register from the UAR table.
>> 
>> For Rx, the doorbell in UAR is required in arming CQ event. However, it is
>> a known issue that the register isn't remapped for secondary process.
>> 
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> 
> <...>
> 
>> @@ -229,13 +229,99 @@ mlx5_tx_queue_release(void *dpdk_txq)
>> 		}
>> }
>> 
>> +/**
>> + * Initialize Tx UAR registers for primary process.
>> + *
>> + * @param txq_ctrl
>> + *   Pointer to Tx queue control structure.
>> + */
>> +static void
>> +txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
>> +{
>> +	struct mlx5_priv *priv = txq_ctrl->priv;
>> +	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
>> +
>> +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>> +	assert(ppriv);
>> +	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
>> +#ifndef RTE_ARCH_64
>> +	struct mlx5_priv *priv = txq_ctrl->priv;
>> +	struct mlx5_txq_data *txq = &txq_ctrl->txq;
>> +	unsigned int lock_idx;
>> +	/* Assign an UAR lock according to UAR page number */
>> +	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
>> +		   MLX5_UAR_PAGE_NUM_MASK;
>> +	txq->uar_lock = &priv->uar_lock[lock_idx];
>> +#endif
>> +}
> 
> This won't compile for arch is not 64bits, since 'page_size' in that block is
> not defined.

It is embarrassing that I have committed so many mistakes on this last patchset.
So many contexts in my head... Or, this patches must be haunted. :-(

I always test 32-bit but it looks like a mistake when rebasing it, not sure...
My apologies. I've sent out v5.

For your convenience, here's the diff.

$ git diff yskoh/upstr-remove-uar-remap
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 904c4f5c03..6224b3be1a 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -56,16 +56,6 @@
 /** Enable extending memsegs when creating a MR. */
 #define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en"

-/* Reserved address space for UAR mapping. */
-#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
-
 enum {
        PCI_VENDOR_ID_MELLANOX = 0x15b3,
 };
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index bfe6655800..69b6960e94 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -91,16 +91,6 @@
 /* Timeout in seconds to get a valid link status. */
 #define MLX5_LINK_STATUS_TIMEOUT 10

-/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
-
 /* Maximum number of UAR pages used by a port,
  * These are the size and mask for an array of mutexes used to synchronize
  * the access to port's UARs on platforms that do not support 64 bit writes.
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5fb1761955..9965b2b771 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -240,18 +240,19 @@ txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
 {
        struct mlx5_priv *priv = txq_ctrl->priv;
        struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+#ifndef RTE_ARCH_64
+       unsigned int lock_idx;
+       const size_t page_size = sysconf(_SC_PAGESIZE);
+#endif

        assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
        assert(ppriv);
        ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
 #ifndef RTE_ARCH_64
-       struct mlx5_priv *priv = txq_ctrl->priv;
-       struct mlx5_txq_data *txq = &txq_ctrl->txq;
-       unsigned int lock_idx;
        /* Assign an UAR lock according to UAR page number */
        lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
                   MLX5_UAR_PAGE_NUM_MASK;
-       txq->uar_lock = &priv->uar_lock[lock_idx];
+       txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx];
 #endif
 }




^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
  2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
                     ` (4 preceding siblings ...)
  2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 4/4] net/mlx4: " Yongseok Koh
@ 2019-04-11  8:40   ` Shahaf Shuler
  2019-04-11  8:40     ` Shahaf Shuler
  5 siblings, 1 reply; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-11  8:40 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Wednesday, April 10, 2019 9:41 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
> 
> This patchset lifts the requirement of reserving huge virtual address space
> and remapping device UAR register on to it in order to use the same address
> between primary and secondary process.
> 

Applied to next-net-mlx and replaced with the previos v4. Sorry for that.
Thanks. 

> v5:
> * fix 32-bit build issue
> * remove MLX[4|5]_UAR_SIZE and MLX[4|5]_UAR_OFFSET
> 
> v4:
> * add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
> * remove the number of Rx queues from the ppriv size calculation
> * move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
> * rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]
> 
> v3:
> * move UAR table to per-process storage
> 
> v2:
> * rebase on the latest branch tip
> * fix a bug
> 
> [1]
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fpatch%2F52435%2F&amp;data=02%7C01%7Cshahafs%40mel
> lanox.com%7C8399415a5e5540a23b8608d6bde424e5%7Ca652971c7d2e4d9ba
> 6a4d149256f461b%7C0%7C0%7C636905184909290125&amp;sdata=tyL7dnToo
> KnkgSQzoFIhpSsBVcVevMbBHcZ%2BeWlXLiA%3D&amp;reserved=0
> 
> Yongseok Koh (4):
>   net/mlx5: fix recursive inclusion of header file
>   net/mlx5: remove redundant queue index
>   net/mlx5: remove device register remap
>   net/mlx4: remove device register remap
> 
>  drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
>  drivers/net/mlx4/mlx4.h            |  25 ++--
>  drivers/net/mlx4/mlx4_prm.h        |   3 +-
>  drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
>  drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
>  drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
>  drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
>  drivers/net/mlx5/mlx5.h            |  17 ++-
>  drivers/net/mlx5/mlx5_defs.h       |  10 --
>  drivers/net/mlx5/mlx5_ethdev.c     |   3 +
>  drivers/net/mlx5/mlx5_flow.c       |   5 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
>  drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
>  drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
>  drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
>  drivers/net/mlx5/mlx5_stats.c      |  15 +--
>  drivers/net/mlx5/mlx5_trigger.c    |   8 +-
>  drivers/net/mlx5/mlx5_txq.c        | 200 +++++++++++++++++------------
>  drivers/net/mlx5/mlx5_vlan.c       |   3 +-
>  19 files changed, 414 insertions(+), 595 deletions(-)
> 
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
  2019-04-11  8:40   ` [dpdk-dev] [PATCH v5 0/4] net/mlx: " Shahaf Shuler
@ 2019-04-11  8:40     ` Shahaf Shuler
  0 siblings, 0 replies; 66+ messages in thread
From: Shahaf Shuler @ 2019-04-11  8:40 UTC (permalink / raw)
  To: Yongseok Koh; +Cc: dev

Wednesday, April 10, 2019 9:41 PM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v5 0/4] net/mlx: remove device register remap
> 
> This patchset lifts the requirement of reserving huge virtual address space
> and remapping device UAR register on to it in order to use the same address
> between primary and secondary process.
> 

Applied to next-net-mlx and replaced with the previos v4. Sorry for that.
Thanks. 

> v5:
> * fix 32-bit build issue
> * remove MLX[4|5]_UAR_SIZE and MLX[4|5]_UAR_OFFSET
> 
> v4:
> * add mlx4_proc_priv_[init|uninit]() to avoid duplicate code
> * remove the number of Rx queues from the ppriv size calculation
> * move freeing ppriv to mlx4_dev_close() from mlx4_dev_stop()
> * rebase on top of "net/mlx4: fix Tx doorbell register unmap" [1]
> 
> v3:
> * move UAR table to per-process storage
> 
> v2:
> * rebase on the latest branch tip
> * fix a bug
> 
> [1]
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fpatch%2F52435%2F&amp;data=02%7C01%7Cshahafs%40mel
> lanox.com%7C8399415a5e5540a23b8608d6bde424e5%7Ca652971c7d2e4d9ba
> 6a4d149256f461b%7C0%7C0%7C636905184909290125&amp;sdata=tyL7dnToo
> KnkgSQzoFIhpSsBVcVevMbBHcZ%2BeWlXLiA%3D&amp;reserved=0
> 
> Yongseok Koh (4):
>   net/mlx5: fix recursive inclusion of header file
>   net/mlx5: remove redundant queue index
>   net/mlx5: remove device register remap
>   net/mlx4: remove device register remap
> 
>  drivers/net/mlx4/mlx4.c            | 255 +++++++++----------------------------
>  drivers/net/mlx4/mlx4.h            |  25 ++--
>  drivers/net/mlx4/mlx4_prm.h        |   3 +-
>  drivers/net/mlx4/mlx4_rxtx.c       |   2 +-
>  drivers/net/mlx4/mlx4_rxtx.h       |   6 +-
>  drivers/net/mlx4/mlx4_txq.c        | 170 +++++++++++++++----------
>  drivers/net/mlx5/mlx5.c            | 228 ++++++++-------------------------
>  drivers/net/mlx5/mlx5.h            |  17 ++-
>  drivers/net/mlx5/mlx5_defs.h       |  10 --
>  drivers/net/mlx5/mlx5_ethdev.c     |   3 +
>  drivers/net/mlx5/mlx5_flow.c       |   5 +-
>  drivers/net/mlx5/mlx5_flow_dv.c    |   4 +-
>  drivers/net/mlx5/mlx5_flow_verbs.c |   5 +-
>  drivers/net/mlx5/mlx5_rxq.c        |  29 ++---
>  drivers/net/mlx5/mlx5_rxtx.h       |  21 +--
>  drivers/net/mlx5/mlx5_stats.c      |  15 +--
>  drivers/net/mlx5/mlx5_trigger.c    |   8 +-
>  drivers/net/mlx5/mlx5_txq.c        | 200 +++++++++++++++++------------
>  drivers/net/mlx5/mlx5_vlan.c       |   3 +-
>  19 files changed, 414 insertions(+), 595 deletions(-)
> 
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 66+ messages in thread

end of thread, other threads:[~2019-04-11  8:41 UTC | newest]

Thread overview: 66+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-25 19:36 [dpdk-dev] [PATCH 0/3] net/mlx: remove device register remap Yongseok Koh
2019-03-25 19:36 ` Yongseok Koh
2019-03-25 19:36 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
2019-03-25 19:36   ` Yongseok Koh
2019-03-25 19:36 ` [dpdk-dev] [PATCH 2/3] net/mlx5: remove device register remap Yongseok Koh
2019-03-25 19:36   ` Yongseok Koh
2019-03-25 19:36 ` [dpdk-dev] [PATCH 3/3] net/mlx4: " Yongseok Koh
2019-03-25 19:36   ` Yongseok Koh
2019-04-01 21:22 ` [dpdk-dev] [PATCH v2 0/3] net/mlx: " Yongseok Koh
2019-04-01 21:22   ` Yongseok Koh
2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 1/3] net/mlx5: fix recursive inclusion of header file Yongseok Koh
2019-04-01 21:22     ` Yongseok Koh
2019-04-02  5:39     ` Shahaf Shuler
2019-04-02  5:39       ` Shahaf Shuler
2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap Yongseok Koh
2019-04-01 21:22     ` Yongseok Koh
2019-04-02  6:50     ` Shahaf Shuler
2019-04-02  6:50       ` Shahaf Shuler
2019-04-01 21:22   ` [dpdk-dev] [PATCH v2 3/3] net/mlx4: " Yongseok Koh
2019-04-01 21:22     ` Yongseok Koh
2019-04-05  1:33 ` [dpdk-dev] [PATCH v3 0/4] net/mlx: " Yongseok Koh
2019-04-05  1:33   ` Yongseok Koh
2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
2019-04-05  1:33     ` Yongseok Koh
2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 2/4] net/mlx5: remove redundant queue index Yongseok Koh
2019-04-05  1:33     ` Yongseok Koh
2019-04-08  5:24     ` Shahaf Shuler
2019-04-08  5:24       ` Shahaf Shuler
2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 3/4] net/mlx5: remove device register remap Yongseok Koh
2019-04-05  1:33     ` Yongseok Koh
2019-04-08  5:48     ` Shahaf Shuler
2019-04-08  5:48       ` Shahaf Shuler
2019-04-09 19:36       ` Yongseok Koh
2019-04-09 19:36         ` Yongseok Koh
2019-04-05  1:33   ` [dpdk-dev] [PATCH v3 4/4] net/mlx4: " Yongseok Koh
2019-04-05  1:33     ` Yongseok Koh
2019-04-09 23:13 ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Yongseok Koh
2019-04-09 23:13   ` Yongseok Koh
2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
2019-04-09 23:13     ` Yongseok Koh
2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 2/4] net/mlx5: remove redundant queue index Yongseok Koh
2019-04-09 23:13     ` Yongseok Koh
2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 3/4] net/mlx5: remove device register remap Yongseok Koh
2019-04-09 23:13     ` Yongseok Koh
2019-04-10 17:50     ` Ferruh Yigit
2019-04-10 17:50       ` Ferruh Yigit
2019-04-10 19:12       ` Yongseok Koh
2019-04-10 19:12         ` Yongseok Koh
2019-04-09 23:13   ` [dpdk-dev] [PATCH v4 4/4] net/mlx4: " Yongseok Koh
2019-04-09 23:13     ` Yongseok Koh
2019-04-10  6:58   ` [dpdk-dev] [PATCH v4 0/4] net/mlx: " Shahaf Shuler
2019-04-10  6:58     ` Shahaf Shuler
2019-04-10 17:50     ` Ferruh Yigit
2019-04-10 17:50       ` Ferruh Yigit
2019-04-10 18:41 ` [dpdk-dev] [PATCH v5 " Yongseok Koh
2019-04-10 18:41   ` Yongseok Koh
2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 1/4] net/mlx5: fix recursive inclusion of header file Yongseok Koh
2019-04-10 18:41     ` Yongseok Koh
2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 2/4] net/mlx5: remove redundant queue index Yongseok Koh
2019-04-10 18:41     ` Yongseok Koh
2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 3/4] net/mlx5: remove device register remap Yongseok Koh
2019-04-10 18:41     ` Yongseok Koh
2019-04-10 18:41   ` [dpdk-dev] [PATCH v5 4/4] net/mlx4: " Yongseok Koh
2019-04-10 18:41     ` Yongseok Koh
2019-04-11  8:40   ` [dpdk-dev] [PATCH v5 0/4] net/mlx: " Shahaf Shuler
2019-04-11  8:40     ` Shahaf Shuler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).