DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Zhang, Qi Z" <qi.z.zhang@intel.com>
To: "Zeng, ZhichaoX" <zhichaox.zeng@intel.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Cc: "Yang, Qiming" <qiming.yang@intel.com>,
	"Zhou, YidingX" <yidingx.zhou@intel.com>,
	"Wu, Jingjing" <jingjing.wu@intel.com>,
	"Xing, Beilei" <beilei.xing@intel.com>,
	"Sinha, Abhijit" <abhijit.sinha@intel.com>,
	"Doherty, Declan" <declan.doherty@intel.com>,
	"Nicolau, Radu" <radu.nicolau@intel.com>
Subject: RE: [PATCH v2] net/iavf: fix TSO offload for tunnel case
Date: Tue, 27 Sep 2022 02:33:57 +0000	[thread overview]
Message-ID: <DM4PR11MB5994D9FE03DD74722E9E1C9DD7559@DM4PR11MB5994.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20220926051725.261950-1-zhichaox.zeng@intel.com>



> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Sent: Monday, September 26, 2022 1:17 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhou, YidingX
> <yidingx.zhou@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Zeng,
> ZhichaoX <zhichaox.zeng@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> Xing, Beilei <beilei.xing@intel.com>; Sinha, Abhijit <abhijit.sinha@intel.com>;
> Doherty, Declan <declan.doherty@intel.com>; Nicolau, Radu
> <radu.nicolau@intel.com>
> Subject: [PATCH v2] net/iavf: fix TSO offload for tunnel case
> 
> This patch is to fix the tunnel TSO not enabling issue, simplify the logic of
> calculating 'Tx Buffer Size' of data descriptor with IPSec and fix handling that
> the mbuf size exceeds the TX descriptor hardware limit(1B-16KB) which
> causes malicious behavior to the NIC.
> 
> Fixes: 1e728b01120c ("net/iavf: rework Tx path")
> 
> ---
> v2: rework patch
> 
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
> ---
>  drivers/common/iavf/iavf_osdep.h |  2 +
>  drivers/net/iavf/iavf_rxtx.c     | 95 +++++++++++++++++++-------------
>  2 files changed, 59 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/common/iavf/iavf_osdep.h
> b/drivers/common/iavf/iavf_osdep.h
> index 31d3d809f9..bf1436dfc6 100644
> --- a/drivers/common/iavf/iavf_osdep.h
> +++ b/drivers/common/iavf/iavf_osdep.h
> @@ -126,6 +126,8 @@ writeq(uint64_t value, volatile void *addr)  #define
> iavf_memset(a, b, c, d) memset((a), (b), (c))  #define iavf_memcpy(a, b, c, d)
> rte_memcpy((a), (b), (c))
> 
> +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
> +

This looks like not necessary be added in osdep.h
Can we simply  make it local or at some header file in net/iavf, so we don't need to have a patch that cross the modules.

>  #define iavf_usec_delay(x) rte_delay_us_sleep(x)  #define
> iavf_msec_delay(x) iavf_usec_delay(1000 * (x))
> 
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 109ba756f8..a06d9d3da6 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -2417,7 +2417,7 @@ iavf_fill_ctx_desc_segmentation_field(volatile
> uint64_t *field,
>  		total_length = m->pkt_len - (m->l2_len + m->l3_len + m-
> >l4_len);
> 
>  		if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
> -			total_length -= m->outer_l3_len;
> +			total_length -= m->outer_l3_len + m->outer_l2_len;
>  	}
> 
>  #ifdef RTE_LIBRTE_IAVF_DEBUG_TX
> @@ -2581,50 +2581,39 @@ iavf_build_data_desc_cmd_offset_fields(volatile
> uint64_t *qw1,
>  		((uint64_t)l2tag1 <<
> IAVF_TXD_DATA_QW1_L2TAG1_SHIFT));  }
> 
> +/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */
> +#define IAVF_MAX_DATA_PER_TXD \
> +	(IAVF_TXD_QW1_TX_BUF_SZ_MASK >>
> IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
> +
> +/* Calculate the number of TX descriptors needed for each pkt */ static
> +inline uint16_t iavf_calc_pkt_desc(struct rte_mbuf *tx_pkt) {
> +	struct rte_mbuf *txd = tx_pkt;
> +	uint16_t count = 0;
> +
> +	while (txd != NULL) {
> +		count += DIV_ROUND_UP(txd->data_len,
> IAVF_MAX_DATA_PER_TXD);
> +		txd = txd->next;
> +	}
> +
> +	return count;
> +}
> +
>  static inline void
>  iavf_fill_data_desc(volatile struct iavf_tx_desc *desc,
> -	struct rte_mbuf *m, uint64_t desc_template,
> -	uint16_t tlen, uint16_t ipseclen)
> +	uint64_t desc_template,	uint16_t buffsz,
> +	uint64_t buffer_addr)
>  {
> -	uint32_t hdrlen = m->l2_len;
> -	uint32_t bufsz = 0;
> -
>  	/* fill data descriptor qw1 from template */
>  	desc->cmd_type_offset_bsz = desc_template;
> 
> -	/* set data buffer address */
> -	desc->buffer_addr = rte_mbuf_data_iova(m);
> -
> -	/* calculate data buffer size less set header lengths */
> -	if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) &&
> -			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> -					RTE_MBUF_F_TX_UDP_SEG))) {
> -		hdrlen += m->outer_l3_len;
> -		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
> -			hdrlen += m->l3_len + m->l4_len;
> -		else
> -			hdrlen += m->l3_len;
> -		if (m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)
> -			hdrlen += ipseclen;
> -		bufsz = hdrlen + tlen;
> -	} else if ((m->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) &&
> -			(m->ol_flags & (RTE_MBUF_F_TX_TCP_SEG |
> -					RTE_MBUF_F_TX_UDP_SEG))) {
> -		hdrlen += m->outer_l3_len + m->l3_len + ipseclen;
> -		if (m->ol_flags & RTE_MBUF_F_TX_L4_MASK)
> -			hdrlen += m->l4_len;
> -		bufsz = hdrlen + tlen;
> -
> -	} else {
> -		bufsz = m->data_len;
> -	}
> -
>  	/* set data buffer size */
>  	desc->cmd_type_offset_bsz |=
> -		(((uint64_t)bufsz <<
> IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
> +		(((uint64_t)buffsz <<
> IAVF_TXD_DATA_QW1_TX_BUF_SZ_SHIFT) &
>  		IAVF_TXD_DATA_QW1_TX_BUF_SZ_MASK);
> 
> -	desc->buffer_addr = rte_cpu_to_le_64(desc->buffer_addr);
> +	desc->buffer_addr = rte_cpu_to_le_64(buffer_addr);
>  	desc->cmd_type_offset_bsz = rte_cpu_to_le_64(desc-
> >cmd_type_offset_bsz);
>  }
> 
> @@ -2649,8 +2638,10 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>  	struct iavf_tx_entry *txe_ring = txq->sw_ring;
>  	struct iavf_tx_entry *txe, *txn;
>  	struct rte_mbuf *mb, *mb_seg;
> +	uint64_t buf_dma_addr;
>  	uint16_t desc_idx, desc_idx_last;
>  	uint16_t idx;
> +	uint16_t slen;
> 
> 
>  	/* Check if the descriptor ring needs to be cleaned. */ @@ -2689,8
> +2680,14 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t nb_pkts)
>  		 * The number of descriptors that must be allocated for
>  		 * a packet equals to the number of the segments of that
>  		 * packet plus the context and ipsec descriptors if needed.
> +		 * Recalculate the needed tx descs when TSO enabled in case
> +		 * the mbuf data size exceeds max data size that hw allows
> +		 * per tx desc.
>  		 */
> -		nb_desc_required = nb_desc_data + nb_desc_ctx +
> nb_desc_ipsec;
> +		if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
> +			nb_desc_required = iavf_calc_pkt_desc(mb) +
> nb_desc_ctx + nb_desc_ipsec;
> +		else
> +			nb_desc_required = nb_desc_data + nb_desc_ctx +
> nb_desc_ipsec;
> 
>  		desc_idx_last = (uint16_t)(desc_idx + nb_desc_required - 1);
> 
> @@ -2786,8 +2783,30 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>  				rte_pktmbuf_free_seg(txe->mbuf);
> 
>  			txe->mbuf = mb_seg;
> -			iavf_fill_data_desc(ddesc, mb_seg,
> -					ddesc_template, tlen, ipseclen);
> +			slen = mb_seg->data_len;
> +			if (mb_seg->ol_flags &
> RTE_MBUF_F_TX_SEC_OFFLOAD)
> +				slen += ipseclen;
> +			buf_dma_addr = rte_mbuf_data_iova(mb_seg);
> +			while ((mb_seg->ol_flags &
> (RTE_MBUF_F_TX_TCP_SEG |
> +					RTE_MBUF_F_TX_UDP_SEG)) &&
> +					unlikely(slen >
> IAVF_MAX_DATA_PER_TXD)) {
> +				iavf_fill_data_desc(ddesc, ddesc_template,
> +					IAVF_MAX_DATA_PER_TXD,
> buf_dma_addr);
> +
> +				IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
> +
> +				buf_dma_addr +=
> IAVF_MAX_DATA_PER_TXD;
> +				slen -= IAVF_MAX_DATA_PER_TXD;
> +
> +				txe->last_id = desc_idx_last;
> +				desc_idx = txe->next_id;
> +				txe = txn;
> +				ddesc = &txr[desc_idx];
> +				txn = &txe_ring[txe->next_id];
> +			}
> +
> +			iavf_fill_data_desc(ddesc, ddesc_template,
> +					slen, buf_dma_addr);
> 
>  			IAVF_DUMP_TX_DESC(txq, ddesc, desc_idx);
> 
> --
> 2.25.1


  parent reply	other threads:[~2022-09-27  2:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-12 16:52 [PATCH 1/2] net/iavf: enable TSO offloading for tunnel cases peng1x.zhang
2022-08-12 16:52 ` [PATCH 2/2] net/iavf: support inner and outer checksum offload peng1x.zhang
2022-08-30  8:12   ` Yang, Qiming
2022-09-01  9:33   ` [PATCH v2] net/iavf: enable inner and outer Tx " Peng Zhang
2022-09-01 11:04     ` Zhang, Qi Z
2022-09-05  2:25     ` Yang, Qiming
2022-09-20  9:14     ` [PATCH v3] " Zhichao Zeng
2022-09-22  9:02       ` Xu, Ke1
2022-09-25  5:58         ` Zhang, Qi Z
2022-08-30  7:52 ` [PATCH 1/2] net/iavf: enable TSO offloading for tunnel cases Yang, Qiming
2022-09-26  5:17 ` [PATCH v2] net/iavf: fix TSO offload for tunnel case Zhichao Zeng
2022-09-26  9:48   ` Xu, Ke1
2022-09-27  2:33   ` Zhang, Qi Z [this message]
2022-09-27  9:56   ` [PATCH v3] " Zhichao Zeng
2022-09-29  5:27     ` [PATCH v4] " Zhichao Zeng
2022-09-30  3:46       ` Xu, Ke1
2022-09-30  9:05       ` Nicolau, Radu
2022-10-08  7:55         ` Zhang, Qi Z

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM4PR11MB5994D9FE03DD74722E9E1C9DD7559@DM4PR11MB5994.namprd11.prod.outlook.com \
    --to=qi.z.zhang@intel.com \
    --cc=abhijit.sinha@intel.com \
    --cc=beilei.xing@intel.com \
    --cc=declan.doherty@intel.com \
    --cc=dev@dpdk.org \
    --cc=jingjing.wu@intel.com \
    --cc=qiming.yang@intel.com \
    --cc=radu.nicolau@intel.com \
    --cc=yidingx.zhou@intel.com \
    --cc=zhichaox.zeng@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).