DPDK patches and discussions
 help / color / mirror / Atom feed
From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v3 7/7] mlx: use aligned memory to register regions
Date: Thu,  3 Mar 2016 15:27:17 +0100	[thread overview]
Message-ID: <1457015237-2993-8-git-send-email-adrien.mazarguil@6wind.com> (raw)
In-Reply-To: <1457015237-2993-1-git-send-email-adrien.mazarguil@6wind.com>

The first and last memory pool elements are usually cache-aligned but not
page-aligned, particularly when using huge pages.

Hardware performance can be improved significantly by registering memory
regions starting and ending on page boundaries.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx4/mlx4.c      | 58 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_rxq.c  |  6 +----
 drivers/net/mlx5/mlx5_rxtx.c | 52 ++++++++++++++++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_rxtx.h |  1 +
 4 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 6688f66..3c1f4c2 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -86,6 +86,7 @@
 #include <rte_version.h>
 #include <rte_log.h>
 #include <rte_alarm.h>
+#include <rte_memory.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -1177,6 +1178,52 @@ txq_complete(struct txq *txq)
 	return 0;
 }
 
+/* For best performance, this function should not be inlined. */
+static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *)
+	__attribute__((noinline));
+
+/**
+ * Register mempool as a memory region.
+ *
+ * @param pd
+ *   Pointer to protection domain.
+ * @param mp
+ *   Pointer to memory pool.
+ *
+ * @return
+ *   Memory region pointer, NULL in case of error.
+ */
+static struct ibv_mr *
+mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start = mp->elt_va_start;
+	uintptr_t end = mp->elt_va_end;
+	unsigned int i;
+
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (const void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (const void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	return ibv_reg_mr(pd,
+			  (void *)start,
+			  end - start,
+			  IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+}
+
 /**
  * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
  * the cloned mbuf is allocated is returned instead.
@@ -1228,10 +1275,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq, mp->name, (const void *)mp);
-	mr = ibv_reg_mr(txq->priv->pd,
-			(void *)mp->elt_va_start,
-			(mp->elt_va_end - mp->elt_va_start),
-			(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+	mr = mlx4_mp2mr(txq->priv->pd, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq);
@@ -3713,11 +3757,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 	DEBUG("%p: %s scattered packets support (%u WRs)",
 	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
 	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = ibv_reg_mr(priv->pd,
-			     (void *)mp->elt_va_start,
-			     (mp->elt_va_end - mp->elt_va_start),
-			     (IBV_ACCESS_LOCAL_WRITE |
-			      IBV_ACCESS_REMOTE_WRITE));
+	tmpl.mr = mlx4_mp2mr(priv->pd, mp);
 	if (tmpl.mr == NULL) {
 		ret = EINVAL;
 		ERROR("%p: MR creation failure: %s",
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 55d002e..0f5ac65 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1190,11 +1190,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
 	DEBUG("%p: %s scattered packets support (%u WRs)",
 	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
 	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = ibv_reg_mr(priv->pd,
-			     (void *)mp->elt_va_start,
-			     (mp->elt_va_end - mp->elt_va_start),
-			     (IBV_ACCESS_LOCAL_WRITE |
-			      IBV_ACCESS_REMOTE_WRITE));
+	tmpl.mr = mlx5_mp2mr(priv->pd, mp);
 	if (tmpl.mr == NULL) {
 		ret = EINVAL;
 		ERROR("%p: MR creation failure: %s",
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 622ac17..4c53c7a 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -55,6 +55,7 @@
 #include <rte_prefetch.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
+#include <rte_memory.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -135,6 +136,52 @@ txq_complete(struct txq *txq)
 	return 0;
 }
 
+/* For best performance, this function should not be inlined. */
+struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *)
+	__attribute__((noinline));
+
+/**
+ * Register mempool as a memory region.
+ *
+ * @param pd
+ *   Pointer to protection domain.
+ * @param mp
+ *   Pointer to memory pool.
+ *
+ * @return
+ *   Memory region pointer, NULL in case of error.
+ */
+struct ibv_mr *
+mlx5_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start = mp->elt_va_start;
+	uintptr_t end = mp->elt_va_end;
+	unsigned int i;
+
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (const void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (const void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	return ibv_reg_mr(pd,
+			  (void *)start,
+			  end - start,
+			  IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+}
+
 /**
  * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
  * the cloned mbuf is allocated is returned instead.
@@ -186,10 +233,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq, mp->name, (const void *)mp);
-	mr = ibv_reg_mr(txq->priv->pd,
-			(void *)mp->elt_va_start,
-			(mp->elt_va_end - mp->elt_va_start),
-			(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+	mr = mlx5_mp2mr(txq->priv->pd, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index b239ebf..e85cf93 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -317,6 +317,7 @@ void mlx5_tx_queue_release(void *);
 
 /* mlx5_rxtx.c */
 
+struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, const struct rte_mempool *);
 void txq_mp2mr_iter(const struct rte_mempool *, void *);
 uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_rx_burst_sp(void *, struct rte_mbuf **, uint16_t);
-- 
2.1.4

  parent reply	other threads:[~2016-03-03 14:27 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-29 10:32 [dpdk-dev] [PATCH 0/6] Performance optimizations for mlx5 Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 1/6] mlx5: prefetch next TX mbuf header and data Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 2/6] mlx5: reorder TX/RX queue structure Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 3/6] mlx5: remove one indirection level from RX/TX functions Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 4/6] mlx5: process offload flags only when requested Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 5/6] mlx5: avoid lkey retrieval for inlined packets Adrien Mazarguil
2016-01-29 10:32 ` [dpdk-dev] [PATCH 6/6] mlx5: free buffers immediately after completion Adrien Mazarguil
2016-02-22 18:17 ` [dpdk-dev] [PATCH v2 0/7] Performance optimizations for mlx5 and mlx4 Adrien Mazarguil
2016-02-22 18:17   ` [dpdk-dev] [PATCH v2 1/7] mlx5: prefetch next TX mbuf header and data Adrien Mazarguil
2016-02-22 18:17   ` [dpdk-dev] [PATCH v2 2/7] mlx5: reorder TX/RX queue structure Adrien Mazarguil
2016-02-22 18:17   ` [dpdk-dev] [PATCH v2 3/7] mlx5: remove one indirection level from RX/TX functions Adrien Mazarguil
2016-02-22 18:18   ` [dpdk-dev] [PATCH v2 4/7] mlx5: process offload flags only when requested Adrien Mazarguil
2016-02-22 18:18   ` [dpdk-dev] [PATCH v2 5/7] mlx5: avoid lkey retrieval for inlined packets Adrien Mazarguil
2016-02-22 18:18   ` [dpdk-dev] [PATCH v2 6/7] mlx5: free buffers immediately after completion Adrien Mazarguil
2016-02-22 18:18   ` [dpdk-dev] [PATCH v2 7/7] mlx: use aligned memory to register regions Adrien Mazarguil
2016-03-03 14:27   ` [dpdk-dev] [PATCH v3 0/7] Performance optimizations for mlx5 and mlx4 Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 1/7] mlx5: prefetch next TX mbuf header and data Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 2/7] mlx5: reorder TX/RX queue structure Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 3/7] mlx5: remove one indirection level from RX/TX functions Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 4/7] mlx5: process offload flags only when requested Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 5/7] mlx5: avoid lkey retrieval for inlined packets Adrien Mazarguil
2016-03-03 14:27     ` [dpdk-dev] [PATCH v3 6/7] mlx5: free buffers immediately after completion Adrien Mazarguil
2016-03-03 14:27     ` Adrien Mazarguil [this message]
2016-03-09 16:28     ` [dpdk-dev] [PATCH v3 0/7] Performance optimizations for mlx5 and mlx4 Bruce Richardson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457015237-2993-8-git-send-email-adrien.mazarguil@6wind.com \
    --to=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).