DPDK patches and discussions
 help / color / mirror / Atom feed
From: Dmitry Kozlyuk <dkozlyuk@oss.nvidia.com>
To: <dev@dpdk.org>
Cc: Raslan Darawsheh <rasland@oss.nvidia.com>,
	Matan Azrad <matan@oss.nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@oss.nvidia.com>
Subject: [dpdk-dev] [PATCH] common/mlx5: fix external memory pool registration
Date: Tue, 2 Nov 2021 08:59:17 +0200	[thread overview]
Message-ID: <20211102065917.889267-1-dkozlyuk@nvidia.com> (raw)

Registration of packet mempools with RTE_PKTMBUF_POOL_PINNED_EXT_MEM
was performed incorrectly: after population of such mempool chunks
only contain memory for rte_mbuf structures, while pointers to actual
external memory are not yet filled. MR LKeys could not be obtained
for external memory addresses of such mempools. Rx datapath assumes
all used mempools are registered and does not fallback to dynamic
MR creation in such case, so no packets could be received.

Skip registration of extmem pools on population because it is useless.
If used for Rx, they are registered at port start.
During registration, recognize such pools, inspect their mbufs
and recover the pages they reside in.

While MRs for these pages may already be created by rte_dev_dma_map(),
they are not reused to avoid synchronization on Rx datapath
in case these MRs are changed in the database.

Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities")

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Matan Azrad <matan@nvidia.com>
Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/mlx5_common.c    |   4 +
 drivers/common/mlx5/mlx5_common_mr.c | 113 +++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_trigger.c      |   8 +-
 3 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index e6ff045c95..97df19db4e 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -395,9 +395,13 @@ mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp,
 			  void *arg)
 {
 	struct mlx5_common_device *cdev = arg;
+	bool extmem = rte_pktmbuf_priv_flags(mp) &
+		      RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF;
 
 	switch (event) {
 	case RTE_MEMPOOL_EVENT_READY:
+		if (extmem)
+			break;
 		if (mlx5_dev_mempool_register(cdev, mp) < 0)
 			DRV_LOG(ERR,
 				"Failed to register new mempool %s for PD %p: %s",
diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index 53a3e8565d..3d7a892c9b 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -1331,6 +1331,105 @@ mlx5_range_from_mempool_chunk(struct rte_mempool *mp, void *opaque,
 	range->end = RTE_ALIGN_CEIL(range->start + memhdr->len, page_size);
 }
 
+/**
+ * Collect page-aligned memory ranges of the mempool.
+ */
+static int
+mlx5_mempool_get_chunks(struct rte_mempool *mp, struct mlx5_range **out,
+			unsigned int *out_n)
+{
+	struct mlx5_range *chunks;
+	unsigned int n;
+
+	n = mp->nb_mem_chunks;
+	chunks = calloc(sizeof(chunks[0]), n);
+	if (chunks == NULL)
+		return -1;
+	rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks);
+	*out = chunks;
+	*out_n = n;
+	return 0;
+}
+
+struct mlx5_mempool_get_extmem_data {
+	struct mlx5_range *heap;
+	unsigned int heap_size;
+	int ret;
+};
+
+static void
+mlx5_mempool_get_extmem_cb(struct rte_mempool *mp, void *opaque,
+			   void *obj, unsigned int obj_idx)
+{
+	struct mlx5_mempool_get_extmem_data *data = opaque;
+	struct rte_mbuf *mbuf = obj;
+	uintptr_t addr = (uintptr_t)mbuf->buf_addr;
+	struct mlx5_range *seg, *heap;
+	struct rte_memseg_list *msl;
+	size_t page_size;
+	uintptr_t page_start;
+	unsigned int pos = 0, len = data->heap_size, delta;
+
+	RTE_SET_USED(mp);
+	RTE_SET_USED(obj_idx);
+	if (data->ret < 0)
+		return;
+	/* Binary search for an already visited page. */
+	while (len > 1) {
+		delta = len / 2;
+		if (addr < data->heap[pos + delta].start) {
+			len = delta;
+		} else {
+			pos += delta;
+			len -= delta;
+		}
+	}
+	if (data->heap != NULL) {
+		seg = &data->heap[pos];
+		if (seg->start <= addr && addr < seg->end)
+			return;
+	}
+	/* Determine the page boundaries and remember them. */
+	heap = realloc(data->heap, sizeof(heap[0]) * (data->heap_size + 1));
+	if (heap == NULL) {
+		free(data->heap);
+		data->heap = NULL;
+		data->ret = -1;
+		return;
+	}
+	data->heap = heap;
+	data->heap_size++;
+	seg = &heap[data->heap_size - 1];
+	msl = rte_mem_virt2memseg_list((void *)addr);
+	page_size = msl != NULL ? msl->page_sz : rte_mem_page_size();
+	page_start = RTE_PTR_ALIGN_FLOOR(addr, page_size);
+	seg->start = page_start;
+	seg->end = page_start + page_size;
+	/* Maintain the heap order. */
+	qsort(data->heap, data->heap_size, sizeof(heap[0]),
+	      mlx5_range_compare_start);
+}
+
+/**
+ * Recover pages of external memory as close as possible
+ * for a mempool with RTE_PKTMBUF_POOL_PINNED_EXT_BUF.
+ * Pages are stored in a heap for efficient search, for mbufs are many.
+ */
+static int
+mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range **out,
+			unsigned int *out_n)
+{
+	struct mlx5_mempool_get_extmem_data data;
+
+	memset(&data, 0, sizeof(data));
+	rte_mempool_obj_iter(mp, mlx5_mempool_get_extmem_cb, &data);
+	if (data.ret < 0)
+		return -1;
+	*out = data.heap;
+	*out_n = data.heap_size;
+	return 0;
+}
+
 /**
  * Get VA-contiguous ranges of the mempool memory.
  * Each range start and end is aligned to the system page size.
@@ -1350,13 +1449,15 @@ mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out,
 			unsigned int *out_n)
 {
 	struct mlx5_range *chunks;
-	unsigned int chunks_n = mp->nb_mem_chunks, contig_n, i;
+	unsigned int chunks_n, contig_n, i;
+	int ret;
 
-	/* Collect page-aligned memory ranges of the mempool. */
-	chunks = calloc(sizeof(chunks[0]), chunks_n);
-	if (chunks == NULL)
-		return -1;
-	rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks);
+	/* Collect the pool underlying memory. */
+	ret = (rte_pktmbuf_priv_flags(mp) & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) ?
+	      mlx5_mempool_get_extmem(mp, &chunks, &chunks_n) :
+	      mlx5_mempool_get_chunks(mp, &chunks, &chunks_n);
+	if (ret < 0)
+		return ret;
 	/* Merge adjacent chunks and place them at the beginning. */
 	qsort(chunks, chunks_n, sizeof(chunks[0]), mlx5_range_compare_start);
 	contig_n = 1;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index d916c8addc..546b20bf86 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -146,14 +146,18 @@ mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
 		return 0;
 	}
 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
+		uint32_t flags;
+
 		mp = rxq_ctrl->rxq.rxseg[s].mp;
+		flags = rte_pktmbuf_priv_flags(mp);
 		ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
 					       priv->sh->cdev->pd, mp,
 					       &priv->mp_id);
 		if (ret < 0 && rte_errno != EEXIST)
 			return ret;
-		rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
-				     &rxq_ctrl->rxq);
+		if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0)
+			rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
+					&rxq_ctrl->rxq);
 	}
 	return 0;
 }
-- 
2.25.1


             reply	other threads:[~2021-11-02  6:59 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-02  6:59 Dmitry Kozlyuk [this message]
2021-11-09 10:32 ` [dpdk-dev] [PATCH v2] " Dmitry Kozlyuk
2021-11-09 13:40   ` Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211102065917.889267-1-dkozlyuk@nvidia.com \
    --to=dkozlyuk@oss.nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@oss.nvidia.com \
    --cc=rasland@oss.nvidia.com \
    --cc=viacheslavo@oss.nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).