DPDK patches and discussions
 help / color / mirror / Atom feed
From: Zhichao Zeng <zhichaox.zeng@intel.com>
To: dev@dpdk.org
Cc: qiming.yang@intel.com, yidingx.zhou@intel.com,
	qi.z.zhang@intel.com, Zhichao Zeng <zhichaox.zeng@intel.com>,
	Jingjing Wu <jingjing.wu@intel.com>,
	Beilei Xing <beilei.xing@intel.com>,
	Radu Nicolau <radu.nicolau@intel.com>,
	Abhijit Sinha <abhijit.sinha@intel.com>,
	Declan Doherty <declan.doherty@intel.com>
Subject: [PATCH v4] net/iavf: fix TSO offload for tunnel case
Date: Thu, 29 Sep 2022 13:27:02 +0800	[thread overview]
Message-ID: <20220929052702.668519-1-zhichaox.zeng@intel.com> (raw)
In-Reply-To: <20220927095634.448796-1-zhichaox.zeng@intel.com>

This patch is to fix the tunnel TSO not enabling issue, simplify
the logic of calculating 'Tx Buffer Size' of data descriptor with IPSec,
and fix handling that the mbuf size exceeds the TX descriptor
hardware limit(1B-16KB) which causes malicious behavior to the NIC.

Fixes: 1e728b01120c ("net/iavf: rework Tx path")

Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>

---
v4: fix the IPsec TSO issue
---
v3: move macros to iavf header file
---
v2: rework patch
---
 drivers/net/iavf/iavf_rxtx.c | 99 ++++++++++++++++++++++--------------
 drivers/net/iavf/iavf_rxtx.h |  4 ++
 2 files changed, 65 insertions(+), 38 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index fc5d9e38cc..f317399ca2 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2463,7 +2463,7 @@ iavf_fill_ctx_desc_segmentation_field(volatile uint64_t *field,
 		total_length = m->pkt_len - (m->l2_len + m->l3_len + m->l4_len);
 
 		if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
-			total_length -= m->outer_l3_len;
+			total_length -= m->outer_l3_len + m->outer_l2_len;
 	}
 
 #ifdef RTE_LIBRTE_IAVF_DEBUG_TX
@@ -2634,50 +2634,36 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t *qw1,
 		((uint64_t)l2tag1 << IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));
 }
 
+/* Calculate the number of TX descriptors needed for each pkt */
+static inline uint16_t
+iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt)
+{
+	struct rte_mbuf *txd = tx_pkt;
+	uint16_t count = 0;
+
+	while (txd != NULL) {
+		count += (txd->data_len + IAVF_MAX_DATA_PER_TXD - 1) /
+			IAVF_MAX_DATA_PER_TXD;
+		txd = txd->next;
+	}
+
+	return count;
+}
+
 static inline void
 iavf_fill_data_desc(volatile struct iavf_tx_desc *desc,
-	struct rte_mbuf *m, uint64_t desc_template,
-	uint16_t tlen, uint16_t ipseclen)
+	uint64_t desc_template,	uint16_t buffsz,
+	uint64_t buffer_addr)
 {
-	uint32_t hdrlen = m->l2_len;
-	uint32_t bufsz = 0;
-
 	/* fill data descriptor qw1 from template */
 	desc->cmd_type_offset_bsz = desc_template;
 
-	/* set data buffer address */
-	desc->buffer_addr = rte_mbuf_data_iova(m);
-
-	/* calculate data buffer size less set header lengths */
-	if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) &&
-			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
-					RTE_MBUF_F_TX_UDP_SEG))) {
-		hdrlen += m->outer_l3_len;
-		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
-			hdrlen += m->l3_len + m->l4_len;
-		else
-			hdrlen += m->l3_len;
-		if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
-			hdrlen += ipseclen;
-		bufsz = hdrlen + tlen;
-	} else if ((m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) &&
-			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
-					RTE_MBUF_F_TX_UDP_SEG))) {
-		hdrlen += m->outer_l3_len + m->l3_len + ipseclen;
-		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
-			hdrlen += m->l4_len;
-		bufsz = hdrlen + tlen;
-
-	} else {
-		bufsz = m->data_len;
-	}
-
 	/* set data buffer size */
 	desc->cmd_type_offset_bsz |=
-		(((uint64_t)bufsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
+		(((uint64_t)buffsz << IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
 		IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
 
-	desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr);
+	desc->buffer_addr = rte_cpu_to_le_64(buffer_addr);
 	desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc->cmd_type_offset_bsz);
 }
 
@@ -2702,8 +2688,10 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	struct iavf_tx_entry *txe_ring = txq->sw_ring;
 	struct iavf_tx_entry *txe, *txn;
 	struct rte_mbuf *mb, *mb_seg;
+	uint64_t buf_dma_addr;
 	uint16_t desc_idx, desc_idx_last;
 	uint16_t idx;
+	uint16_t slen;
 
 
 	/* Check if the descriptor ring needs to be cleaned. */
@@ -2742,8 +2730,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		 * The number of descriptors that must be allocated for
 		 * a packet equals to the number of the segments of that
 		 * packet plus the context and ipsec descriptors if needed.
+		 * Recalculate the needed tx descs when TSO enabled in case
+		 * the mbuf data size exceeds max data size that hw allows
+		 * per tx desc.
 		 */
-		nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec;
+		if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+			nb_desc_required = iavf_calc_pkt_desc(mb) + nb_desc_ctx + nb_desc_ipsec;
+		else
+			nb_desc_required = nb_desc_data + nb_desc_ctx + nb_desc_ipsec;
 
 		desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1);
 
@@ -2839,8 +2833,37 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 				rte_pktmbuf_free_seg(txe->mbuf);
 
 			txe->mbuf = mb_seg;
-			iavf_fill_data_desc(ddesc, mb_seg,
-					ddesc_template, tlen, ipseclen);
+
+			if (mb_seg->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
+				slen = tlen + mb_seg->l2_len + mb_seg->l3_len +
+						mb_seg->outer_l3_len + ipseclen;
+				if (mb_seg->ol_flags & RTE_MBUF_F_TX_L4_MASK)
+					slen += mb_seg->l4_len;
+			} else {
+				slen = mb_seg->data_len;
+			}
+
+			buf_dma_addr = rte_mbuf_data_iova(mb_seg);
+			while ((mb_seg->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
+					RTE_MBUF_F_TX_UDP_SEG)) &&
+					unlikely(slen > IAVF_MAX_DATA_PER_TXD)) {
+				iavf_fill_data_desc(ddesc, ddesc_template,
+					IAVF_MAX_DATA_PER_TXD, buf_dma_addr);
+
+				IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
+
+				buf_dma_addr += IAVF_MAX_DATA_PER_TXD;
+				slen -= IAVF_MAX_DATA_PER_TXD;
+
+				txe->last_id = desc_idx_last;
+				desc_idx = txe->next_id;
+				txe = txn;
+				ddesc = &txr[desc_idx];
+				txn = &txe_ring[txe->next_id];
+			}
+
+			iavf_fill_data_desc(ddesc, ddesc_template,
+					slen, buf_dma_addr);
 
 			IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
 
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 66e832713c..ae871467ab 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -79,6 +79,10 @@
 #define IAVF_TX_OFFLOAD_NOTSUP_MASK \
 		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK)
 
+/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
+#define IAVF_MAX_DATA_PER_TXD \
+	(IAVF_TXD_QW1_TX_BUF_SZ_MASK >> IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
+
 extern uint64_t iavf_timestamp_dynflag;
 extern int iavf_timestamp_dynfield_offset;
 
-- 
2.25.1


  reply	other threads:[~2022-09-29  5:25 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-12 16:52 [PATCH 1/2] net/iavf: enable TSO offloading for tunnel cases peng1x.zhang
2022-08-12 16:52 ` [PATCH 2/2] net/iavf: support inner and outer checksum offload peng1x.zhang
2022-08-30  8:12   ` Yang, Qiming
2022-09-01  9:33   ` [PATCH v2] net/iavf: enable inner and outer Tx " Peng Zhang
2022-09-01 11:04     ` Zhang, Qi Z
2022-09-05  2:25     ` Yang, Qiming
2022-09-20  9:14     ` [PATCH v3] " Zhichao Zeng
2022-09-22  9:02       ` Xu, Ke1
2022-09-25  5:58         ` Zhang, Qi Z
2022-08-30  7:52 ` [PATCH 1/2] net/iavf: enable TSO offloading for tunnel cases Yang, Qiming
2022-09-26  5:17 ` [PATCH v2] net/iavf: fix TSO offload for tunnel case Zhichao Zeng
2022-09-26  9:48   ` Xu, Ke1
2022-09-27  2:33   ` Zhang, Qi Z
2022-09-27  9:56   ` [PATCH v3] " Zhichao Zeng
2022-09-29  5:27     ` Zhichao Zeng [this message]
2022-09-30  3:46       ` [PATCH v4] " Xu, Ke1
2022-09-30  9:05       ` Nicolau, Radu
2022-10-08  7:55         ` Zhang, Qi Z

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220929052702.668519-1-zhichaox.zeng@intel.com \
    --to=zhichaox.zeng@intel.com \
    --cc=abhijit.sinha@intel.com \
    --cc=beilei.xing@intel.com \
    --cc=declan.doherty@intel.com \
    --cc=dev@dpdk.org \
    --cc=jingjing.wu@intel.com \
    --cc=qi.z.zhang@intel.com \
    --cc=qiming.yang@intel.com \
    --cc=radu.nicolau@intel.com \
    --cc=yidingx.zhou@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).