DPDK patches and discussions
 help / color / mirror / Atom feed
From: Yongseok Koh <yskoh@mellanox.com>
To: adrien.mazarguil@6wind.com, nelio.laranjeiro@6wind.com
Cc: dev@dpdk.org, shahafs@mellanox.com, Yongseok Koh <yskoh@mellanox.com>
Subject: [dpdk-dev] [PATCH v2 4/5] net/mlx5: use stride index in Rx completion entry
Date: Tue, 26 Jun 2018 05:39:24 -0700	[thread overview]
Message-ID: <20180626123925.37439-5-yskoh@mellanox.com> (raw)
In-Reply-To: <20180626123925.37439-1-yskoh@mellanox.com>

Multi-Packet Receive Queue is to receive multiple packets on a single large
buffer. The number of consumed strides in CQE is accumulated to keep track
of the current stride index. However, it is safer to directly use stride
index in CQE to avoid out-of-order situation which can possibly be caused
by introducing LRO in the future.

If Rx CQE compression is enabled, HW can be configured to store the stride
index in a mini-CQE but this will need newer version of library/driver.
Therefore, since this change, MPRQ is only supported with the newer
library/driver and Rx hash result is not supported if MPRQ is enabled along
with Rx CQE compression.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
---
 doc/guides/nics/mlx5.rst     |  4 ++++
 drivers/net/mlx5/Makefile    |  2 +-
 drivers/net/mlx5/mlx5_rxq.c  |  8 +++++++-
 drivers/net/mlx5/mlx5_rxtx.c | 41 +++++++++++++++++++++++------------------
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 5 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 79c982e29..7dd9c1c5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -132,6 +132,10 @@ Limitations
   the external buffers will be freed by PMD and the application which still
   holds the external buffers may be corrupted.
 
+- If Multi-Packet Rx queue is configured (``mprq_en``) and Rx CQE compression is
+  enabled (``rxq_cqe_comp_en``) at the same time, RSS hash result is not fully
+  supported. Some Rx packets may not have PKT_RX_RSS_HASH.
+
 Statistics
 ----------
 
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8a5229e61..955861a41 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -100,7 +100,7 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 	$Q sh -- '$<' '$@' \
 		HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT \
 		infiniband/mlx5dv.h \
-		enum MLX5DV_CONTEXT_MASK_STRIDING_RQ \
+		enum MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_IBV_DEVICE_TUNNEL_SUPPORT \
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 17db7c160..08dd5596b 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -818,7 +818,13 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	if (config->cqe_comp && !rxq_data->hw_timestamp) {
 		attr.cq.mlx5.comp_mask |=
 			MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+		attr.cq.mlx5.cqe_comp_res_format =
+			mprq_en ? MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
+				  MLX5DV_CQE_RES_FORMAT_HASH;
+#else
 		attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
+#endif
 		/*
 		 * For vectorized Rx, it must not be doubled in order to
 		 * make cq_ci and rq_ci aligned.
@@ -976,7 +982,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 	rxq_data->rq_db = rwq.dbrec;
 	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
 	rxq_data->cq_ci = 0;
-	rxq_data->strd_ci = 0;
+	rxq_data->consumed_strd = 0;
 	rxq_data->rq_pi = 0;
 	rxq_data->zip = (struct rxq_zip){
 		.ai = 0,
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a4e15c519..a7ed8d8e4 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2108,7 +2108,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
 	unsigned int i = 0;
 	uint16_t rq_ci = rxq->rq_ci;
-	uint16_t strd_idx = rxq->strd_ci;
+	uint16_t consumed_strd = rxq->consumed_strd;
 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 
 	while (i < pkts_n) {
@@ -2116,13 +2116,14 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		void *addr;
 		int ret;
 		unsigned int len;
-		uint16_t consumed_strd;
+		uint16_t strd_cnt;
+		uint16_t strd_idx;
 		uint32_t offset;
 		uint32_t byte_cnt;
 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
-		uint32_t rss_hash_res;
+		uint32_t rss_hash_res = 0;
 
-		if (strd_idx == strd_n) {
+		if (consumed_strd == strd_n) {
 			/* Replace WQE only if the buffer is still in use. */
 			if (rte_atomic16_read(&buf->refcnt) > 1) {
 				mprq_buf_replace(rxq, rq_ci & wq_mask);
@@ -2142,7 +2143,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					rxq->mprq_repl = rep;
 			}
 			/* Advance to the next WQE. */
-			strd_idx = 0;
+			consumed_strd = 0;
 			++rq_ci;
 			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 		}
@@ -2156,14 +2157,21 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			continue;
 		}
 		byte_cnt = ret;
-		consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
-				MLX5_MPRQ_STRIDE_NUM_SHIFT;
-		assert(consumed_strd);
-		/* Calculate offset before adding up stride index. */
-		offset = strd_idx * strd_sz + strd_shift;
-		strd_idx += consumed_strd;
+		strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+			   MLX5_MPRQ_STRIDE_NUM_SHIFT;
+		assert(strd_cnt);
+		consumed_strd += strd_cnt;
 		if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
 			continue;
+		if (mcqe == NULL) {
+			rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
+			strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
+		} else {
+			/* mini-CQE for MPRQ doesn't have hash result. */
+			strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
+		}
+		assert(strd_idx < strd_n);
+		assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
 		/*
 		 * Currently configured to receive a packet per a stride. But if
 		 * MTU is adjusted through kernel interface, device could
@@ -2171,7 +2179,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		 * case, the packet should be dropped because it is bigger than
 		 * the max_rx_pkt_len.
 		 */
-		if (unlikely(consumed_strd > 1)) {
+		if (unlikely(strd_cnt > 1)) {
 			++rxq->stats.idropped;
 			continue;
 		}
@@ -2184,6 +2192,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		assert((int)len >= (rxq->crc_present << 2));
 		if (rxq->crc_present)
 			len -= ETHER_CRC_LEN;
+		offset = strd_idx * strd_sz + strd_shift;
 		addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
 		/* Initialize the offload flag. */
 		pkt->ol_flags = 0;
@@ -2206,7 +2215,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		} else {
 			rte_iova_t buf_iova;
 			struct rte_mbuf_ext_shared_info *shinfo;
-			uint16_t buf_len = consumed_strd * strd_sz;
+			uint16_t buf_len = strd_cnt * strd_sz;
 
 			/* Increment the refcnt of the whole chunk. */
 			rte_atomic16_add_return(&buf->refcnt, 1);
@@ -2242,10 +2251,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				continue;
 			}
 		}
-		/* If compressed, take hash result from mini-CQE. */
-		rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
-						cqe->rx_hash_res :
-						mcqe->rx_hash_result);
 		rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
 		PKT_LEN(pkt) = len;
 		DATA_LEN(pkt) = len;
@@ -2259,7 +2264,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		++i;
 	}
 	/* Update the consumer indexes. */
-	rxq->strd_ci = strd_idx;
+	rxq->consumed_strd = consumed_strd;
 	rte_cio_wmb();
 	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	if (rq_ci != rxq->rq_ci) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 04a432adf..0007be08b 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -93,7 +93,7 @@ struct mlx5_rxq_data {
 	volatile uint32_t *cq_db;
 	uint16_t port_id;
 	uint16_t rq_ci;
-	uint16_t strd_ci; /* Stride index in a WQE for Multi-Packet RQ. */
+	uint16_t consumed_strd; /* Number of consumed strides in WQE. */
 	uint16_t rq_pi;
 	uint16_t cq_ci;
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
-- 
2.11.0

  parent reply	other threads:[~2018-06-26 12:40 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-26 12:39 [dpdk-dev] [PATCH v2 0/5] net/mlx5: support new completion field for Multi-Packet RQ Yongseok Koh
2018-06-26 12:39 ` [dpdk-dev] [PATCH v2 1/5] net/mlx5: change return value of Rx completion poll Yongseok Koh
2018-06-26 12:39 ` [dpdk-dev] [PATCH v2 2/5] net/mlx5: add new fields in Rx completion entry Yongseok Koh
2018-06-26 12:39 ` [dpdk-dev] [PATCH v2 3/5] net/mlx5: add warning message for Multi-Packet RQ Yongseok Koh
2018-06-26 12:39 ` Yongseok Koh [this message]
2018-06-26 12:39 ` [dpdk-dev] [PATCH v2 5/5] net/mlx5: increase number of strides Yongseok Koh
2018-06-27 10:46 ` [dpdk-dev] [PATCH v2 0/5] net/mlx5: support new completion field for Multi-Packet RQ Shahaf Shuler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180626123925.37439-5-yskoh@mellanox.com \
    --to=yskoh@mellanox.com \
    --cc=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=nelio.laranjeiro@6wind.com \
    --cc=shahafs@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).