From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 429EBA0032; Tue, 16 Nov 2021 02:24:32 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 46BC44113C; Tue, 16 Nov 2021 02:24:28 +0100 (CET) Received: from szxga02-in.huawei.com (szxga02-in.huawei.com [45.249.212.188]) by mails.dpdk.org (Postfix) with ESMTP id DFFB840040 for ; Tue, 16 Nov 2021 02:24:24 +0100 (CET) Received: from dggeme756-chm.china.huawei.com (unknown [172.30.72.57]) by szxga02-in.huawei.com (SkyGuard) with ESMTP id 4HtSyC0yTyz9162; Tue, 16 Nov 2021 09:24:03 +0800 (CST) Received: from localhost.localdomain (10.69.192.56) by dggeme756-chm.china.huawei.com (10.3.19.102) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2308.20; Tue, 16 Nov 2021 09:24:22 +0800 From: "Min Hu (Connor)" To: CC: , Subject: [PATCH v2 2/2] net/hns3: optimized Tx performance Date: Tue, 16 Nov 2021 09:22:12 +0800 Message-ID: <20211116012212.64819-3-humin29@huawei.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20211116012212.64819-1-humin29@huawei.com> References: <20211111133859.13705-1-humin29@huawei.com> <20211116012212.64819-1-humin29@huawei.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain X-Originating-IP: [10.69.192.56] X-ClientProxiedBy: dggems706-chm.china.huawei.com (10.3.19.183) To dggeme756-chm.china.huawei.com (10.3.19.102) X-CFilter-Loop: Reflected X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org From: Chengwen Feng This patch uses tx_free_thresh to control mbufs free when the common xmit algorithm is used. This patch also modifies the implementation of PMD's tx_done_cleanup because the mbuf free algorithm changed. In the testpmd single core MAC forwarding scenario, the performance is improved by 10% at 64B on Kunpeng920 platform. Cc: stable@dpdk.org Signed-off-by: Chengwen Feng --- drivers/net/hns3/hns3_rxtx.c | 118 ++++++++++++++++------------------- 1 file changed, 54 insertions(+), 64 deletions(-) diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index f0a57611ec..40cc4e9c1a 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -3077,40 +3077,51 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, return 0; } -static void +static int hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) { uint16_t tx_next_clean = txq->next_to_clean; - uint16_t tx_next_use = txq->next_to_use; - uint16_t tx_bd_ready = txq->tx_bd_ready; - uint16_t tx_bd_max = txq->nb_tx_desc; - struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean]; + uint16_t tx_next_use = txq->next_to_use; + struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean]; struct hns3_desc *desc = &txq->tx_ring[tx_next_clean]; - struct rte_mbuf *mbuf; + int i; - while ((!(desc->tx.tp_fe_sc_vld_ra_ri & - rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) && - tx_next_use != tx_next_clean) { - mbuf = tx_bak_pkt->mbuf; - if (mbuf) { - rte_pktmbuf_free_seg(mbuf); - tx_bak_pkt->mbuf = NULL; - } + if (tx_next_use >= tx_next_clean && + tx_next_use < tx_next_clean + txq->tx_rs_thresh) + return -1; - desc++; - tx_bak_pkt++; - tx_next_clean++; - tx_bd_ready++; - - if (tx_next_clean >= tx_bd_max) { - tx_next_clean = 0; - desc = txq->tx_ring; - tx_bak_pkt = txq->sw_ring; - } + /* + * All mbufs can be released only when the VLD bits of all + * descriptors in a batch are cleared. + */ + for (i = 0; i < txq->tx_rs_thresh; i++) { + if (desc[i].tx.tp_fe_sc_vld_ra_ri & + rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B))) + return -1; } - txq->next_to_clean = tx_next_clean; - txq->tx_bd_ready = tx_bd_ready; + for (i = 0; i < txq->tx_rs_thresh; i++) { + rte_pktmbuf_free_seg(tx_entry[i].mbuf); + tx_entry[i].mbuf = NULL; + } + + /* Update numbers of available descriptor due to buffer freed */ + txq->tx_bd_ready += txq->tx_rs_thresh; + txq->next_to_clean += txq->tx_rs_thresh; + if (txq->next_to_clean >= txq->nb_tx_desc) + txq->next_to_clean = 0; + + return 0; +} + +static inline int +hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds) +{ + while (required_bds > txq->tx_bd_ready) { + if (hns3_tx_free_useless_buffer(txq) != 0) + return -1; + } + return 0; } int @@ -4147,8 +4158,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) uint16_t nb_tx; uint16_t i; - /* free useless buffer */ - hns3_tx_free_useless_buffer(txq); + if (txq->tx_bd_ready < txq->tx_free_thresh) + (void)hns3_tx_free_useless_buffer(txq); tx_next_use = txq->next_to_use; tx_bd_max = txq->nb_tx_desc; @@ -4163,11 +4174,14 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) nb_buf = tx_pkt->nb_segs; if (nb_buf > txq->tx_bd_ready) { - txq->dfx_stats.queue_full_cnt++; - if (nb_tx == 0) - return 0; - - goto end_of_tx; + /* Try to release the required MBUF, but avoid releasing + * all MBUFs, otherwise, the MBUFs will be released for + * a long time and may cause jitter. + */ + if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) { + txq->dfx_stats.queue_full_cnt++; + goto end_of_tx; + } } /* @@ -4577,46 +4591,22 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) static int hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt) { - uint16_t next_to_clean = txq->next_to_clean; - uint16_t next_to_use = txq->next_to_use; - uint16_t tx_bd_ready = txq->tx_bd_ready; - struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean]; - struct hns3_desc *desc = &txq->tx_ring[next_to_clean]; + uint16_t round_free_cnt; uint32_t idx; if (free_cnt == 0 || free_cnt > txq->nb_tx_desc) free_cnt = txq->nb_tx_desc; - for (idx = 0; idx < free_cnt; idx++) { - if (next_to_clean == next_to_use) - break; + if (txq->tx_rs_thresh == 0) + return 0; - if (desc->tx.tp_fe_sc_vld_ra_ri & - rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B))) + round_free_cnt = roundup(free_cnt, txq->tx_rs_thresh); + for (idx = 0; idx < round_free_cnt; idx += txq->tx_rs_thresh) { + if (hns3_tx_free_useless_buffer(txq) != 0) break; - - if (tx_pkt->mbuf != NULL) { - rte_pktmbuf_free_seg(tx_pkt->mbuf); - tx_pkt->mbuf = NULL; - } - - next_to_clean++; - tx_bd_ready++; - tx_pkt++; - desc++; - if (next_to_clean == txq->nb_tx_desc) { - tx_pkt = txq->sw_ring; - desc = txq->tx_ring; - next_to_clean = 0; - } - } - - if (idx > 0) { - txq->next_to_clean = next_to_clean; - txq->tx_bd_ready = tx_bd_ready; } - return (int)idx; + return RTE_MIN(idx, free_cnt); } int -- 2.33.0