DPDK patches and discussions
 help / color / mirror / Atom feed
From: Alexander Kozyrev <akozyrev@mellanox.com>
To: dev@dpdk.org
Cc: rasland@mellanox.com, matan@mellanox.com,
	viacheslavo@mellanox.com, ferruh.yigit@intel.com,
	thomas@monjalon.net
Subject: [dpdk-dev] [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations
Date: Tue, 31 Mar 2020 21:52:37 +0000	[thread overview]
Message-ID: <1585691559-17409-3-git-send-email-akozyrev@mellanox.com> (raw)
In-Reply-To: <1585691559-17409-1-git-send-email-akozyrev@mellanox.com>

MPRQ feature should be updated to allow a packet to be received
into multiple strides in order to support the MTU exceeding 8KB.
Special care is needed to prevent the headroom corruption in the
multi-stride mode since the headroom space is borrowed by the PMD
from the tail of the preceding stride. Copy the whole packet into
a separate mbuf in this case or just the overlapping data if the
Rx scattering is supported by an application.

Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c  | 25 ++++------------
 drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
 drivers/net/mlx5/mlx5_rxtx.h |  2 +-
 3 files changed, 35 insertions(+), 60 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 85fcfe6..a64f536 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1793,9 +1793,7 @@ struct mlx5_rxq_ctrl *
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_rxq_ctrl *tmpl;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	unsigned int mprq_stride_size;
 	struct mlx5_dev_config *config = &priv->config;
-	unsigned int strd_headroom_en;
 	/*
 	 * Always allocate extra slots, even if eventually
 	 * the vector Rx will not be used.
@@ -1841,27 +1839,13 @@ struct mlx5_rxq_ctrl *
 	tmpl->socket = socket;
 	if (dev->data->dev_conf.intr_conf.rxq)
 		tmpl->irq = 1;
-	/*
-	 * LRO packet may consume all the stride memory, hence we cannot
-	 * guaranty head-room near the packet memory in the stride.
-	 * In this case scatter is, for sure, enabled and an empty mbuf may be
-	 * added in the start for the head-room.
-	 */
-	if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
-	    non_scatter_min_mbuf_size > mb_len) {
-		strd_headroom_en = 0;
-		mprq_stride_size = RTE_MIN(max_rx_pkt_len,
-					1u << config->mprq.max_stride_size_n);
-	} else {
-		strd_headroom_en = 1;
-		mprq_stride_size = non_scatter_min_mbuf_size;
-	}
 	if (!config->mprq.stride_num_n)
 		config->mprq.stride_num_n = MLX5_MPRQ_STRIDE_NUM_N;
 	if (!config->mprq.stride_size_n)
-		config->mprq.stride_size_n = (mprq_stride_size <=
+		config->mprq.stride_size_n = (non_scatter_min_mbuf_size <=
 				(1U << config->mprq.max_stride_size_n)) ?
-			log2above(mprq_stride_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+			log2above(non_scatter_min_mbuf_size) :
+			MLX5_MPRQ_STRIDE_SIZE_N;
 	/*
 	 * This Rx queue can be configured as a Multi-Packet RQ if all of the
 	 * following conditions are met:
@@ -1877,7 +1861,8 @@ struct mlx5_rxq_ctrl *
 		tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
 		tmpl->rxq.strd_sz_n = config->mprq.stride_size_n;
 		tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
-		tmpl->rxq.strd_headroom_en = strd_headroom_en;
+		tmpl->rxq.strd_scatter_en =
+				!!(offloads & DEV_RX_OFFLOAD_SCATTER);
 		tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
 				config->mprq.max_memcpy_len);
 		max_lro_size = RTE_MIN(max_rx_pkt_len,
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index f3bf763..4c27952 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
 	unsigned int i = 0;
 	uint32_t rq_ci = rxq->rq_ci;
 	uint16_t consumed_strd = rxq->consumed_strd;
-	uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM;
 	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 
 	while (i < pkts_n) {
 		struct rte_mbuf *pkt;
 		void *addr;
 		int ret;
-		unsigned int len;
+		uint32_t len;
 		uint16_t strd_cnt;
 		uint16_t strd_idx;
 		uint32_t offset;
 		uint32_t byte_cnt;
+		int32_t hdrm_overlap;
 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
 		uint32_t rss_hash_res = 0;
-		uint8_t lro_num_seg;
 
 		if (consumed_strd == strd_n) {
 			/* Replace WQE only if the buffer is still in use. */
@@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
 		MLX5_ASSERT(strd_idx < strd_n);
 		MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
 			    wq_mask));
-		lro_num_seg = cqe->lro_num_seg;
-		/*
-		 * Currently configured to receive a packet per a stride. But if
-		 * MTU is adjusted through kernel interface, device could
-		 * consume multiple strides without raising an error. In this
-		 * case, the packet should be dropped because it is bigger than
-		 * the max_rx_pkt_len.
-		 */
-		if (unlikely(!lro_num_seg && strd_cnt > 1)) {
-			++rxq->stats.idropped;
-			continue;
-		}
 		pkt = rte_pktmbuf_alloc(rxq->mp);
 		if (unlikely(pkt == NULL)) {
 			++rxq->stats.rx_nombuf;
@@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
 			len -= RTE_ETHER_CRC_LEN;
 		offset = strd_idx * strd_sz + strd_shift;
 		addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+		hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
 		/*
 		 * Memcpy packets to the target mbuf if:
 		 * - The size of packet is smaller than mprq_max_memcpy_len.
 		 * - Out of buffer in the Mempool for Multi-Packet RQ.
+		 * - There is no space for a headroom and scatter is disabled.
 		 */
-		if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+		if (len <= rxq->mprq_max_memcpy_len ||
+		    rxq->mprq_repl == NULL ||
+		    (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
 			/*
 			 * When memcpy'ing packet due to out-of-buffer, the
 			 * packet must be smaller than the target mbuf.
@@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
 			rte_atomic16_add_return(&buf->refcnt, 1);
 			MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <=
 				    strd_n + 1);
-			buf_addr = RTE_PTR_SUB(addr, headroom_sz);
+			buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
 			/*
 			 * MLX5 device doesn't use iova but it is necessary in a
 			 * case where the Rx packet is transmitted via a
@@ -1788,43 +1779,42 @@ enum mlx5_txcmp_code {
 			rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
 						  buf_len, shinfo);
 			/* Set mbuf head-room. */
-			pkt->data_off = headroom_sz;
+			SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
 			MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
-			/*
-			 * Prevent potential overflow due to MTU change through
-			 * kernel interface.
-			 */
-			if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
-				rte_pktmbuf_free_seg(pkt);
-				++rxq->stats.idropped;
-				continue;
-			}
+			MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
+				len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
 			DATA_LEN(pkt) = len;
 			/*
-			 * LRO packet may consume all the stride memory, in this
-			 * case packet head-room space is not guaranteed so must
-			 * to add an empty mbuf for the head-room.
+			 * Copy the last fragment of a packet (up to headroom
+			 * size bytes) in case there is a stride overlap with
+			 * a next packet's headroom. Allocate a separate mbuf
+			 * to store this fragment and link it. Scatter is on.
 			 */
-			if (!rxq->strd_headroom_en) {
-				struct rte_mbuf *headroom_mbuf =
-						rte_pktmbuf_alloc(rxq->mp);
+			if (hdrm_overlap > 0) {
+				MLX5_ASSERT(rxq->strd_scatter_en);
+				struct rte_mbuf *seg =
+					rte_pktmbuf_alloc(rxq->mp);
 
-				if (unlikely(headroom_mbuf == NULL)) {
+				if (unlikely(seg == NULL)) {
 					rte_pktmbuf_free_seg(pkt);
 					++rxq->stats.rx_nombuf;
 					break;
 				}
-				PORT(pkt) = rxq->port_id;
-				NEXT(headroom_mbuf) = pkt;
-				pkt = headroom_mbuf;
+				SET_DATA_OFF(seg, 0);
+				rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+					RTE_PTR_ADD(addr, len - hdrm_overlap),
+					hdrm_overlap);
+				DATA_LEN(seg) = hdrm_overlap;
+				DATA_LEN(pkt) = len - hdrm_overlap;
+				NEXT(pkt) = seg;
 				NB_SEGS(pkt) = 2;
 			}
 		}
 		rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
-		if (lro_num_seg > 1) {
+		if (cqe->lro_num_seg > 1) {
 			mlx5_lro_update_hdr(addr, cqe, len);
 			pkt->ol_flags |= PKT_RX_LRO;
-			pkt->tso_segsz = strd_sz;
+			pkt->tso_segsz = len / cqe->lro_num_seg;
 		}
 		PKT_LEN(pkt) = len;
 		PORT(pkt) = rxq->port_id;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 939778a..d155c24 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -119,7 +119,7 @@ struct mlx5_rxq_data {
 	unsigned int strd_sz_n:4; /* Log 2 of stride size. */
 	unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
 	unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
-	unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */
+	unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
 	unsigned int lro:1; /* Enable LRO. */
 	unsigned int :1; /* Remaining bits. */
 	volatile uint32_t *rq_db;
-- 
1.8.3.1


  parent reply	other threads:[~2020-03-31 21:53 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-31 21:52 [dpdk-dev] [PATCH 0/4] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-03-31 21:52 ` [dpdk-dev] [PATCH 1/4] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-02 10:00   ` Slava Ovsiienko
2020-03-31 21:52 ` Alexander Kozyrev [this message]
2020-04-02 10:01   ` [dpdk-dev] [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations Slava Ovsiienko
2020-03-31 21:52 ` [dpdk-dev] [PATCH 3/4] doc: add a decsription for MPRQ stride size devarg Alexander Kozyrev
2020-03-31 21:52 ` [dpdk-dev] [PATCH 4/4] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-02 10:02   ` Slava Ovsiienko
2020-04-02 18:11 ` [dpdk-dev] [PATCH 0/3] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-04-02 18:11   ` [dpdk-dev] [PATCH 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-02 18:11   ` [dpdk-dev] [PATCH 2/3] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-02 18:11   ` [dpdk-dev] [PATCH 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-09 22:23   ` [dpdk-dev] [PATCH v4 0/3] net/mlx5: add large packet size support to MPRQ Alexander Kozyrev
2020-04-09 22:23     ` [dpdk-dev] [PATCH v4 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-14 11:42       ` Ferruh Yigit
2020-04-14 12:52         ` Thomas Monjalon
2020-04-15 11:01           ` Ferruh Yigit
2020-04-15 11:25             ` Luca Boccassi
2020-04-15 15:34               ` Alexander Kozyrev
2020-04-15 15:52                 ` [dpdk-dev] [dpdk-stable] " Luca Boccassi
2020-04-09 22:23     ` [dpdk-dev] [PATCH v4 2/3] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-09 22:23     ` [dpdk-dev] [PATCH v4 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev
2020-04-10 14:01     ` [dpdk-dev] [PATCH v4 0/3] net/mlx5: add large packet size support to MPRQ Matan Azrad
2020-04-13 10:57     ` Raslan Darawsheh
2020-04-09 21:24 ` [dpdk-dev] [PATCH v3 " Alexander Kozyrev
2020-04-09 21:24   ` [dpdk-dev] [PATCH v3 1/3] net/mlx5: add a devarg to specify MPRQ stride size Alexander Kozyrev
2020-04-09 21:24   ` [dpdk-dev] [PATCH v3 2/3] net/mlx5: enable MPRQ multi-stride operations Alexander Kozyrev
2020-04-09 21:24   ` [dpdk-dev] [PATCH v3 3/3] net/mlx5: add multi-segment packets in MPRQ mode Alexander Kozyrev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1585691559-17409-3-git-send-email-akozyrev@mellanox.com \
    --to=akozyrev@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=matan@mellanox.com \
    --cc=rasland@mellanox.com \
    --cc=thomas@monjalon.net \
    --cc=viacheslavo@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).