DPDK patches and discussions
 help / color / mirror / Atom feed
From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
To: dev@dpdk.org
Cc: Adrien Mazarguil <adrien.mazarguil@6wind.com>,
	Bruce Richardson <bruce.richardson@intel.com>
Subject: [dpdk-dev] [PATCH v3 5/6] net/mlx5: reduce memory overhead for WQE handling
Date: Tue, 20 Sep 2016 10:53:50 +0200	[thread overview]
Message-ID: <0e58bd2798d879d106ef6e486083b0e5c764de8e.1474360134.git.nelio.laranjeiro@6wind.com> (raw)
In-Reply-To: <cover.1474360134.git.nelio.laranjeiro@6wind.com>

PMD uses only power of two number of Work Queue Elements (aka WQE), storing
the number of elements in log2 helps to reduce the size of the container to
store it.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 23 ++++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 drivers/net/mlx5/mlx5_txq.c  |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 214922b..9d00ddc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -199,9 +199,10 @@ txq_complete(struct txq *txq)
 	} while (1);
 	if (unlikely(cqe == NULL))
 		return;
-	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+	wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+			    ((1 << txq->wqe_n) - 1)].hdr;
 	elts_tail = wqe->ctrl[3];
-	assert(elts_tail < txq->wqe_n);
+	assert(elts_tail < (1 << txq->wqe_n));
 	/* Free buffers. */
 	while (elts_free != elts_tail) {
 		struct rte_mbuf *elt = (*txq->elts)[elts_free];
@@ -335,7 +336,7 @@ mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
 	}
 	/* Inline if enough room. */
 	if (txq->max_inline != 0) {
-		uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
+		uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
 		uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
 		uint16_t room;
 
@@ -446,7 +447,7 @@ tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
 	volatile struct mlx5_wqe64 *wqe;
 
-	wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+	wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
 	rte_prefetch0(wqe);
 }
 
@@ -504,7 +505,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		max -= segs_n;
 		--pkts_n;
 		elts_head_next = (elts_head + 1) & (elts_n - 1);
-		wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
+		wqe = &(*txq->wqes)[txq->wqe_ci & ((1 << txq->wqe_n) - 1)].hdr;
 		tx_prefetch_wqe(txq, txq->wqe_ci);
 		tx_prefetch_wqe(txq, txq->wqe_ci + 1);
 		if (pkts_n)
@@ -540,7 +541,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
 				dseg = (volatile void *)
 					&(*txq->wqes)[txq->wqe_ci++ &
-						      (txq->wqe_n - 1)];
+						      ((1 << txq->wqe_n) - 1)];
 			else
 				++dseg;
 			++ds;
@@ -607,10 +608,10 @@ skip_segs:
 static inline void
 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
 		(volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+		(uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
 
 	mpw->state = MLX5_MPW_STATE_OPENED;
 	mpw->pkts_n = 0;
@@ -815,7 +816,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-	uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	struct mlx5_wqe_inl_small *inl;
 
 	mpw->state = MLX5_MPW_INL_STATE_OPENED;
@@ -1000,7 +1001,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
 			(*txq->elts)[elts_head] = buf;
 			/* Maximum number of bytes before wrapping. */
-			max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+			max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
 			       (uintptr_t)mpw.data.raw);
 			if (length > max) {
 				rte_memcpy((void *)(uintptr_t)mpw.data.raw,
@@ -1019,7 +1020,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 				mpw.data.raw += length;
 			}
 			if ((uintptr_t)mpw.data.raw ==
-			    (uintptr_t)&(*txq->wqes)[txq->wqe_n])
+			    (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
 				mpw.data.raw =
 					(volatile void *)&(*txq->wqes)[0];
 			++mpw.pkts_n;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 3dca8ca..9828aef 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -246,9 +246,9 @@ struct txq {
 	uint16_t elts_comp; /* Counter since last completion request. */
 	uint16_t cq_ci; /* Consumer index for completion queue. */
 	uint16_t wqe_ci; /* Consumer index for work queue. */
-	uint16_t wqe_n; /* Number of WQ elements. */
 	uint16_t elts_n:4; /* (*elts)[] length (in log2). */
 	uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
+	uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
 	uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
 	uint16_t bf_offset; /* Blueflame offset. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 9919e37..3d2d132 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -81,7 +81,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
-	for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
+	for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
 		volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
 
 		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
@@ -217,7 +217,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
 	tmpl->txq.wqes =
 		(volatile struct mlx5_wqe64 (*)[])
 		(uintptr_t)qp->gen_data.sqstart;
-	tmpl->txq.wqe_n = qp->sq.wqe_cnt;
+	tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
 	tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
 	tmpl->txq.bf_reg = qp->gen_data.bf->reg;
 	tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-- 
2.1.4

  parent reply	other threads:[~2016-09-20  8:54 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-07  7:09 [dpdk-dev] [PATCH 0/6] net/mlx5: performance improvement Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 1/6] net/mlx5: rework hardware structures Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 2/6] net/mlx5: reduce Tx and Rx structure size Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 3/6] " Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 4/6] net/mlx5: reduce Tx " Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 5/6] net/mlx5: reduce Tx and Rx " Nelio Laranjeiro
2016-09-07  7:09 ` [dpdk-dev] [PATCH 6/6] net/mlx5: remove gather loop on segments Nelio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 0/6] net/mlx5: performance improvement Nelio Laranjeiro
2016-09-19 16:17   ` Bruce Richardson
2016-09-20  7:25     ` Nélio Laranjeiro
2016-09-20  8:53   ` [dpdk-dev] [PATCH v3 " Nelio Laranjeiro
2016-09-20  8:53     ` [dpdk-dev] [PATCH v3 1/6] net/mlx5: rework hardware structures Nelio Laranjeiro
2016-09-20  8:53     ` [dpdk-dev] [PATCH v3 2/6] net/mlx5: reduce memory overhead of Rx/Tx descriptors Nelio Laranjeiro
2016-09-20  8:53     ` [dpdk-dev] [PATCH v3 3/6] net/mlx5: reduce memory overhead for CQE handling Nelio Laranjeiro
2016-09-20  8:53     ` [dpdk-dev] [PATCH v3 4/6] net/mlx5: reduce memory overhead for BF handling Nelio Laranjeiro
2016-09-20  8:53     ` Nelio Laranjeiro [this message]
2016-09-20  8:53     ` [dpdk-dev] [PATCH v3 6/6] net/mlx5: remove gather loop on segments Nelio Laranjeiro
2016-09-21 10:22     ` [dpdk-dev] [PATCH v3 0/6] net/mlx5: performance improvement Bruce Richardson
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures Nelio Laranjeiro
2016-09-19 16:14   ` Bruce Richardson
2016-09-20  7:09     ` Nélio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: reduce Tx and Rx structure size Nelio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 3/6] " Nelio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: reduce Tx " Nelio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: reduce Tx and Rx " Nelio Laranjeiro
2016-09-14 12:18 ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: remove gather loop on segments Nelio Laranjeiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0e58bd2798d879d106ef6e486083b0e5c764de8e.1474360134.git.nelio.laranjeiro@6wind.com \
    --to=nelio.laranjeiro@6wind.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).