[PATCH v2 2/2] net/hns3: optimized Tx performance

DPDK patches and discussions
 help / color / mirror / Atom feed

From: "Min Hu (Connor)" <humin29@huawei.com>
To: <dev@dpdk.org>
Cc: <ferruh.yigit@intel.com>, <thomas@monjalon.net>
Subject: [PATCH v2 2/2] net/hns3: optimized Tx performance
Date: Tue, 16 Nov 2021 09:22:12 +0800	[thread overview]
Message-ID: <20211116012212.64819-3-humin29@huawei.com> (raw)
In-Reply-To: <20211116012212.64819-1-humin29@huawei.com>

From: Chengwen Feng <fengchengwen@huawei.com>

This patch uses tx_free_thresh to control mbufs free when the common
xmit algorithm is used.

This patch also modifies the implementation of PMD's tx_done_cleanup
because the mbuf free algorithm changed.

In the testpmd single core MAC forwarding scenario, the performance is
improved by 10% at 64B on Kunpeng920 platform.

Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/net/hns3/hns3_rxtx.c | 118 ++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 64 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index f0a57611ec..40cc4e9c1a 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -3077,40 +3077,51 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	return 0;
 }
 
-static void
+static int
 hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
 {
 	uint16_t tx_next_clean = txq->next_to_clean;
-	uint16_t tx_next_use   = txq->next_to_use;
-	uint16_t tx_bd_ready   = txq->tx_bd_ready;
-	uint16_t tx_bd_max     = txq->nb_tx_desc;
-	struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean];
+	uint16_t tx_next_use = txq->next_to_use;
+	struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean];
 	struct hns3_desc *desc = &txq->tx_ring[tx_next_clean];
-	struct rte_mbuf *mbuf;
+	int i;
 
-	while ((!(desc->tx.tp_fe_sc_vld_ra_ri &
-		rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) &&
-		tx_next_use != tx_next_clean) {
-		mbuf = tx_bak_pkt->mbuf;
-		if (mbuf) {
-			rte_pktmbuf_free_seg(mbuf);
-			tx_bak_pkt->mbuf = NULL;
-		}
+	if (tx_next_use >= tx_next_clean &&
+	    tx_next_use < tx_next_clean + txq->tx_rs_thresh)
+		return -1;
 
-		desc++;
-		tx_bak_pkt++;
-		tx_next_clean++;
-		tx_bd_ready++;
-
-		if (tx_next_clean >= tx_bd_max) {
-			tx_next_clean = 0;
-			desc = txq->tx_ring;
-			tx_bak_pkt = txq->sw_ring;
-		}
+	/*
+	 * All mbufs can be released only when the VLD bits of all
+	 * descriptors in a batch are cleared.
+	 */
+	for (i = 0; i < txq->tx_rs_thresh; i++) {
+		if (desc[i].tx.tp_fe_sc_vld_ra_ri &
+			rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B)))
+			return -1;
 	}
 
-	txq->next_to_clean = tx_next_clean;
-	txq->tx_bd_ready   = tx_bd_ready;
+	for (i = 0; i < txq->tx_rs_thresh; i++) {
+		rte_pktmbuf_free_seg(tx_entry[i].mbuf);
+		tx_entry[i].mbuf = NULL;
+	}
+
+	/* Update numbers of available descriptor due to buffer freed */
+	txq->tx_bd_ready += txq->tx_rs_thresh;
+	txq->next_to_clean += txq->tx_rs_thresh;
+	if (txq->next_to_clean >= txq->nb_tx_desc)
+		txq->next_to_clean = 0;
+
+	return 0;
+}
+
+static inline int
+hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds)
+{
+	while (required_bds > txq->tx_bd_ready) {
+		if (hns3_tx_free_useless_buffer(txq) != 0)
+			return -1;
+	}
+	return 0;
 }
 
 int
@@ -4147,8 +4158,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint16_t nb_tx;
 	uint16_t i;
 
-	/* free useless buffer */
-	hns3_tx_free_useless_buffer(txq);
+	if (txq->tx_bd_ready < txq->tx_free_thresh)
+		(void)hns3_tx_free_useless_buffer(txq);
 
 	tx_next_use   = txq->next_to_use;
 	tx_bd_max     = txq->nb_tx_desc;
@@ -4163,11 +4174,14 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		nb_buf = tx_pkt->nb_segs;
 
 		if (nb_buf > txq->tx_bd_ready) {
-			txq->dfx_stats.queue_full_cnt++;
-			if (nb_tx == 0)
-				return 0;
-
-			goto end_of_tx;
+			/* Try to release the required MBUF, but avoid releasing
+			 * all MBUFs, otherwise, the MBUFs will be released for
+			 * a long time and may cause jitter.
+			 */
+			if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) {
+				txq->dfx_stats.queue_full_cnt++;
+				goto end_of_tx;
+			}
 		}
 
 		/*
@@ -4577,46 +4591,22 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt)
 {
-	uint16_t next_to_clean = txq->next_to_clean;
-	uint16_t next_to_use   = txq->next_to_use;
-	uint16_t tx_bd_ready   = txq->tx_bd_ready;
-	struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean];
-	struct hns3_desc *desc = &txq->tx_ring[next_to_clean];
+	uint16_t round_free_cnt;
 	uint32_t idx;
 
 	if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
 		free_cnt = txq->nb_tx_desc;
 
-	for (idx = 0; idx < free_cnt; idx++) {
-		if (next_to_clean == next_to_use)
-			break;
+	if (txq->tx_rs_thresh == 0)
+		return 0;
 
-		if (desc->tx.tp_fe_sc_vld_ra_ri &
-		    rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))
+	round_free_cnt = roundup(free_cnt, txq->tx_rs_thresh);
+	for (idx = 0; idx < round_free_cnt; idx += txq->tx_rs_thresh) {
+		if (hns3_tx_free_useless_buffer(txq) != 0)
 			break;
-
-		if (tx_pkt->mbuf != NULL) {
-			rte_pktmbuf_free_seg(tx_pkt->mbuf);
-			tx_pkt->mbuf = NULL;
-		}
-
-		next_to_clean++;
-		tx_bd_ready++;
-		tx_pkt++;
-		desc++;
-		if (next_to_clean == txq->nb_tx_desc) {
-			tx_pkt = txq->sw_ring;
-			desc = txq->tx_ring;
-			next_to_clean = 0;
-		}
-	}
-
-	if (idx > 0) {
-		txq->next_to_clean = next_to_clean;
-		txq->tx_bd_ready = tx_bd_ready;
 	}
 
-	return (int)idx;
+	return RTE_MIN(idx, free_cnt);
 }
 
 int
-- 
2.33.0

next prev parent reply	other threads:[~2021-11-16  1:24 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-11 13:38 [PATCH 0/2] performance optimized for hns3 PMD Min Hu (Connor)
2021-11-11 13:38 ` [PATCH 1/2] net/hns3: optimized Tx performance by mbuf fast free Min Hu (Connor)
2021-11-15 17:30   ` Ferruh Yigit
2021-11-16  1:24     ` Min Hu (Connor)
2021-11-11 13:38 ` [PATCH 2/2] net/hns3: optimized Tx performance Min Hu (Connor)
2021-11-15 17:32   ` Ferruh Yigit
2021-11-16  1:22 ` [PATCH v2 0/2] performance optimized for hns3 PMD Min Hu (Connor)
2021-11-16  1:22   ` [PATCH v2 1/2] net/hns3: optimized Tx performance by mbuf fast free Min Hu (Connor)
2021-11-16  1:22   ` Min Hu (Connor) [this message]
2021-11-16 14:36   ` [PATCH v2 0/2] performance optimized for hns3 PMD Ferruh Yigit
2021-11-16 15:04     ` Fengchengwen
2021-11-16 15:12     ` humin (Q)
2021-11-16 15:38     ` Ferruh Yigit
2021-11-16 15:43   ` Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211116012212.64819-3-humin29@huawei.com \
    --to=humin29@huawei.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).