DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Min Hu (Connor)" <humin29@huawei.com>
To: <dev@dpdk.org>
Cc: <ferruh.yigit@intel.com>, <thomas@monjalon.net>
Subject: [PATCH 2/2] net/hns3: optimized Tx performance
Date: Thu, 11 Nov 2021 21:38:59 +0800	[thread overview]
Message-ID: <20211111133859.13705-3-humin29@huawei.com> (raw)
In-Reply-To: <20211111133859.13705-1-humin29@huawei.com>

From: Chengwen Feng <fengchengwen@huawei.com>

The PMD should check whether the descriptor is done by hardware before
free the corresponding mbuf. Currently the common xmit algorithm will
free mbuf every time when it's invoked. Because hardware may not have
finished sending, this may lead to many invalid queries which are
whether the descriptors are done.

This patch uses tx_free_thresh to control whether invoke free mbuf, and
free tx_rs_thresh mbufs each time.

This patch also modifies the implementation of PMD's tx_done_cleanup
because the mbuf free algorithm changed.

In the testpmd single core MAC forwarding scenario, the performance is
improved by 10% at 64B on Kunpeng920 platform.

Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.c | 118 ++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 64 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index 78227a139f..114b2f397f 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -3077,40 +3077,51 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	return 0;
 }
 
-static void
+static int
 hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
 {
 	uint16_t tx_next_clean = txq->next_to_clean;
-	uint16_t tx_next_use   = txq->next_to_use;
-	uint16_t tx_bd_ready   = txq->tx_bd_ready;
-	uint16_t tx_bd_max     = txq->nb_tx_desc;
-	struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean];
+	uint16_t tx_next_use = txq->next_to_use;
+	struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean];
 	struct hns3_desc *desc = &txq->tx_ring[tx_next_clean];
-	struct rte_mbuf *mbuf;
+	int i;
 
-	while ((!(desc->tx.tp_fe_sc_vld_ra_ri &
-		rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) &&
-		tx_next_use != tx_next_clean) {
-		mbuf = tx_bak_pkt->mbuf;
-		if (mbuf) {
-			rte_pktmbuf_free_seg(mbuf);
-			tx_bak_pkt->mbuf = NULL;
-		}
+	if (tx_next_use >= tx_next_clean &&
+	    tx_next_use < tx_next_clean + txq->tx_rs_thresh)
+		return -1;
 
-		desc++;
-		tx_bak_pkt++;
-		tx_next_clean++;
-		tx_bd_ready++;
-
-		if (tx_next_clean >= tx_bd_max) {
-			tx_next_clean = 0;
-			desc = txq->tx_ring;
-			tx_bak_pkt = txq->sw_ring;
-		}
+	/*
+	 * All mbufs can be released only when the VLD bits of all
+	 * descriptors in a batch are cleared.
+	 */
+	for (i = 0; i < txq->tx_rs_thresh; i++) {
+		if (desc[i].tx.tp_fe_sc_vld_ra_ri &
+			rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B)))
+			return -1;
 	}
 
-	txq->next_to_clean = tx_next_clean;
-	txq->tx_bd_ready   = tx_bd_ready;
+	for (i = 0; i < txq->tx_rs_thresh; i++) {
+		rte_pktmbuf_free_seg(tx_entry[i].mbuf);
+		tx_entry[i].mbuf = NULL;
+	}
+
+	/* Update numbers of available descriptor due to buffer freed */
+	txq->tx_bd_ready += txq->tx_rs_thresh;
+	txq->next_to_clean += txq->tx_rs_thresh;
+	if (txq->next_to_clean >= txq->nb_tx_desc)
+		txq->next_to_clean = 0;
+
+	return 0;
+}
+
+static inline int
+hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds)
+{
+	while (required_bds > txq->tx_bd_ready) {
+		if (hns3_tx_free_useless_buffer(txq) != 0)
+			return -1;
+	}
+	return 0;
 }
 
 int
@@ -4147,8 +4158,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_tx;
 	uint16_t i;
 
-	/* free useless buffer */
-	hns3_tx_free_useless_buffer(txq);
+	if (txq->tx_bd_ready < txq->tx_free_thresh)
+		(void)hns3_tx_free_useless_buffer(txq);
 
 	tx_next_use   = txq->next_to_use;
 	tx_bd_max     = txq->nb_tx_desc;
@@ -4163,11 +4174,14 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		nb_buf = tx_pkt->nb_segs;
 
 		if (nb_buf > txq->tx_bd_ready) {
-			txq->dfx_stats.queue_full_cnt++;
-			if (nb_tx == 0)
-				return 0;
-
-			goto end_of_tx;
+			/* Try to release the required MBUF, but avoid releasing
+			 * all MBUFs, otherwise, the MBUFs will be released for
+			 * a long time and may cause jitter.
+			 */
+			if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) {
+				txq->dfx_stats.queue_full_cnt++;
+				goto end_of_tx;
+			}
 		}
 
 		/*
@@ -4577,46 +4591,22 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt)
 {
-	uint16_t next_to_clean = txq->next_to_clean;
-	uint16_t next_to_use   = txq->next_to_use;
-	uint16_t tx_bd_ready   = txq->tx_bd_ready;
-	struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean];
-	struct hns3_desc *desc = &txq->tx_ring[next_to_clean];
+	uint16_t round_free_cnt;
 	uint32_t idx;
 
 	if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
 		free_cnt = txq->nb_tx_desc;
 
-	for (idx = 0; idx < free_cnt; idx++) {
-		if (next_to_clean == next_to_use)
-			break;
+	if (txq->tx_rs_thresh == 0)
+		return 0;
 
-		if (desc->tx.tp_fe_sc_vld_ra_ri &
-		    rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))
+	round_free_cnt = roundup(free_cnt, txq->tx_rs_thresh);
+	for (idx = 0; idx < round_free_cnt; idx += txq->tx_rs_thresh) {
+		if (hns3_tx_free_useless_buffer(txq) != 0)
 			break;
-
-		if (tx_pkt->mbuf != NULL) {
-			rte_pktmbuf_free_seg(tx_pkt->mbuf);
-			tx_pkt->mbuf = NULL;
-		}
-
-		next_to_clean++;
-		tx_bd_ready++;
-		tx_pkt++;
-		desc++;
-		if (next_to_clean == txq->nb_tx_desc) {
-			tx_pkt = txq->sw_ring;
-			desc = txq->tx_ring;
-			next_to_clean = 0;
-		}
-	}
-
-	if (idx > 0) {
-		txq->next_to_clean = next_to_clean;
-		txq->tx_bd_ready = tx_bd_ready;
 	}
 
-	return (int)idx;
+	return RTE_MIN(idx, free_cnt);
 }
 
 int
-- 
2.33.0


  parent reply	other threads:[~2021-11-11 13:41 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-11 13:38 [PATCH 0/2] performance optimized for hns3 PMD Min Hu (Connor)
2021-11-11 13:38 ` [PATCH 1/2] net/hns3: optimized Tx performance by mbuf fast free Min Hu (Connor)
2021-11-15 17:30   ` Ferruh Yigit
2021-11-16  1:24     ` Min Hu (Connor)
2021-11-11 13:38 ` Min Hu (Connor) [this message]
2021-11-15 17:32   ` [PATCH 2/2] net/hns3: optimized Tx performance Ferruh Yigit
2021-11-16  1:22 ` [PATCH v2 0/2] performance optimized for hns3 PMD Min Hu (Connor)
2021-11-16  1:22   ` [PATCH v2 1/2] net/hns3: optimized Tx performance by mbuf fast free Min Hu (Connor)
2021-11-16  1:22   ` [PATCH v2 2/2] net/hns3: optimized Tx performance Min Hu (Connor)
2021-11-16 14:36   ` [PATCH v2 0/2] performance optimized for hns3 PMD Ferruh Yigit
2021-11-16 15:04     ` Fengchengwen
2021-11-16 15:12     ` humin (Q)
2021-11-16 15:38     ` Ferruh Yigit
2021-11-16 15:43   ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211111133859.13705-3-humin29@huawei.com \
    --to=humin29@huawei.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).