DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Min Hu (Connor)" <humin29@huawei.com>
To: <dev@dpdk.org>
Cc: <ferruh.yigit@intel.com>
Subject: [dpdk-dev] [PATCH v2 2/9] net/hns3: support Tx push quick doorbell to improve perf
Date: Wed, 10 Mar 2021 14:16:18 +0800	[thread overview]
Message-ID: <1615356985-24722-3-git-send-email-humin29@huawei.com> (raw)
In-Reply-To: <1615356985-24722-1-git-send-email-humin29@huawei.com>

From: Chengwen Feng <fengchengwen@huawei.com>

Kunpeng 930 support Tx push mode which could improve performance, It
works like below:
1. Add pcie bar45 which support driver direct write the Tx descriptor
or tail reg to it.
2. Support three operations: a) direct write one Tx descriptor, b)
direct write two Tx descriptors, c) direct write tail reg.
3. The original tail reg located at bar23, the above bar45 tail reg
could provide better bandwidth from the hardware perspective.

The hns3 driver only support direct write tail reg (also have the name
of quick doorbell), the detail:
1. Considering compatibility, firmware will report Tx push capa if the
hardware support it.
2. Add control macro RTE_LIBRTE_HNS3_ENABLE_TX_PUSH which was not
defined default.
3. If user define macro RTE_LIBRTE_HNS3_ENABLE_TX_PUSH and hardware
support, then driver will direct write bar45 tail reg to inform the
hardware.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 doc/guides/rel_notes/release_21_05.rst |  1 +
 drivers/net/hns3/hns3_ethdev.c         |  4 +-
 drivers/net/hns3/hns3_ethdev_vf.c      |  4 +-
 drivers/net/hns3/hns3_rxtx.c           | 85 +++++++++++++++++++++++++++++++++-
 drivers/net/hns3/hns3_rxtx.h           | 24 ++++++++++
 drivers/net/hns3/hns3_rxtx_vec_neon.h  |  2 +-
 drivers/net/hns3/hns3_rxtx_vec_sve.c   |  2 +-
 7 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst
index 1d85942..10f6dd0 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -61,6 +61,7 @@ New Features
   * Added support for freeing Tx mbuf on demand.
   * Added support for copper port in Kunpeng930.
   * Added support for runtime config to select IO burst function.
+  * Added support for Tx push qick doorbell to improve performance.
 
 * **Updated NXP DPAA2 driver.**
 
diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index e921924..f725f5c 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -4886,6 +4886,8 @@ hns3_init_pf(struct rte_eth_dev *eth_dev)
 		goto err_cmd_init;
 	}
 
+	hns3_tx_push_init(eth_dev);
+
 	/*
 	 * To ensure that the hardware environment is clean during
 	 * initialization, the driver actively clear the hardware environment
@@ -6682,8 +6684,8 @@ hns3_dev_init(struct rte_eth_dev *eth_dev)
 				     "process, ret = %d", ret);
 			goto err_mp_init_secondary;
 		}
-
 		hw->secondary_cnt++;
+		hns3_tx_push_init(eth_dev);
 		return 0;
 	}
 
diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c
index 5dd17c2..6a90cd5 100644
--- a/drivers/net/hns3/hns3_ethdev_vf.c
+++ b/drivers/net/hns3/hns3_ethdev_vf.c
@@ -1841,6 +1841,8 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev)
 		goto err_cmd_init;
 	}
 
+	hns3_tx_push_init(eth_dev);
+
 	/* Get VF resource */
 	ret = hns3_query_vf_resource(hw);
 	if (ret)
@@ -2818,8 +2820,8 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev)
 					  "process, ret = %d", ret);
 			goto err_mp_init_secondary;
 		}
-
 		hw->secondary_cnt++;
+		hns3_tx_push_init(eth_dev);
 		return 0;
 	}
 
diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c
index f5c7d71..ffd8331 100644
--- a/drivers/net/hns3/hns3_rxtx.c
+++ b/drivers/net/hns3/hns3_rxtx.c
@@ -2758,6 +2758,81 @@ hns3_tx_queue_conf_check(struct hns3_hw *hw, const struct rte_eth_txconf *conf,
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_HNS3_ENABLE_TX_PUSH
+static void *
+hns3_tx_push_get_queue_tail_reg(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+#define HNS3_TX_PUSH_TQP_REGION_SIZE		0x10000
+#define HNS3_TX_PUSH_QUICK_DOORBELL_OFFSET	64
+#define HNS3_TX_PUSH_PCI_BAR_INDEX		4
+
+	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
+	uint8_t bar_id = HNS3_TX_PUSH_PCI_BAR_INDEX;
+
+	/*
+	 * If device support tx push then its pcie bar45 must exist, and DPDK
+	 * framework will mmap the bar45 default in pci probe stage.
+	 *
+	 * In the bar45, the first half is for roce, and the second half is for
+	 * nic, every TQP occupy 64KB.
+	 *
+	 * The quick doorbell located at 64B offset in the TQP region.
+	 */
+	return (void *)((char *)pci_dev->mem_resource[bar_id].addr +
+			(pci_dev->mem_resource[bar_id].len >> 1) +
+			HNS3_TX_PUSH_TQP_REGION_SIZE * queue_id +
+			HNS3_TX_PUSH_QUICK_DOORBELL_OFFSET);
+}
+#endif
+
+void
+hns3_tx_push_init(struct rte_eth_dev *dev)
+{
+#ifdef RTE_LIBRTE_HNS3_ENABLE_TX_PUSH
+	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	volatile uint32_t *reg;
+	uint32_t val;
+
+	if (!hns3_dev_tx_push_supported(hw))
+		return;
+
+	reg = (volatile uint32_t *)hns3_tx_push_get_queue_tail_reg(dev, 0);
+	/*
+	 * Because the size of bar45 is about 8GB size, it may take a long time
+	 * to do the page fault in Tx process when work with vfio-pci, so use
+	 * one read operation to make kernel setup page table mapping for bar45
+	 * in the init stage.
+	 * Note: the bar45 is readable but the result is all 1.
+	 */
+	val = *reg;
+	RTE_SET_USED(val);
+#else
+	RTE_SET_USED(dev);
+#endif
+}
+
+static void
+hns3_tx_push_queue_init(struct rte_eth_dev *dev,
+			uint16_t queue_id,
+			struct hns3_tx_queue *txq)
+{
+#ifdef RTE_LIBRTE_HNS3_ENABLE_TX_PUSH
+	struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	if (!hns3_dev_tx_push_supported(hw)) {
+		txq->tx_push_enable = false;
+		return;
+	}
+
+	txq->io_tail_reg = (volatile void *)hns3_tx_push_get_queue_tail_reg(dev,
+						queue_id);
+	txq->tx_push_enable = true;
+#else
+	RTE_SET_USED(dev);
+	RTE_SET_USED(queue_id);
+	txq->tx_push_enable = false;
+#endif
+}
+
 int
 hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 		    unsigned int socket_id, const struct rte_eth_txconf *conf)
@@ -2848,6 +2923,12 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
 	memset(&txq->basic_stats, 0, sizeof(struct hns3_tx_basic_stats));
 	memset(&txq->dfx_stats, 0, sizeof(struct hns3_tx_dfx_stats));
 
+	/*
+	 * Call hns3_tx_push_queue_init after assigned io_tail_reg field because
+	 * it may overwrite the io_tail_reg field.
+	 */
+	hns3_tx_push_queue_init(dev, idx, txq);
+
 	rte_spinlock_lock(&hw->lock);
 	dev->data->tx_queues[idx] = txq;
 	rte_spinlock_unlock(&hw->lock);
@@ -3770,7 +3851,7 @@ hns3_xmit_pkts_simple(void *tx_queue,
 	hns3_tx_fill_hw_ring(txq, tx_pkts + nb_tx, nb_pkts - nb_tx);
 	txq->next_to_use += nb_pkts - nb_tx;
 
-	hns3_write_reg_opt(txq->io_tail_reg, nb_pkts);
+	hns3_write_txq_tail_reg(txq, nb_pkts);
 
 	return nb_pkts;
 }
@@ -3887,7 +3968,7 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 end_of_tx:
 
 	if (likely(nb_tx))
-		hns3_write_reg_opt(txq->io_tail_reg, nb_hold);
+		hns3_write_txq_tail_reg(txq, nb_hold);
 
 	return nb_tx;
 }
diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h
index 9adeb24..6ce89cc 100644
--- a/drivers/net/hns3/hns3_rxtx.h
+++ b/drivers/net/hns3/hns3_rxtx.h
@@ -406,6 +406,7 @@ struct hns3_tx_dfx_stats {
 
 struct hns3_tx_queue {
 	void *io_base;
+	/* The io_tail_reg is write-only if working in tx push mode */
 	volatile void *io_tail_reg;
 	struct hns3_adapter *hns;
 	struct hns3_desc *tx_ring;
@@ -471,6 +472,7 @@ struct hns3_tx_queue {
 
 	uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */
 	bool tx_deferred_start; /* don't start this queue in dev start */
+	bool tx_push_enable;    /* check whether the tx push is enabled */
 	bool configured;        /* indicate if tx queue has been configured */
 	/*
 	 * Indicate whether add the vlan_tci of the mbuf to the inner VLAN field
@@ -638,6 +640,27 @@ hns3_rx_calc_ptype(struct hns3_rx_queue *rxq, const uint32_t l234_info,
 			ptype_tbl->l4table[l4id];
 }
 
+/*
+ * If enable using tx push feature and also device support it, then use qick
+ * doorbell (bar45) to inform the hardware.
+ *
+ * The other cases (such as: device don't support or user don't enable using)
+ * then use normal doorbell (bar23) to inform the hardware.
+ */
+static inline void
+hns3_write_txq_tail_reg(struct hns3_tx_queue *txq, uint32_t value)
+{
+	rte_io_wmb();
+#ifdef RTE_LIBRTE_HNS3_ENABLE_TX_PUSH
+	if (txq->tx_push_enable)
+		rte_write64_relaxed(rte_cpu_to_le_32(value), txq->io_tail_reg);
+	else
+		rte_write32_relaxed(rte_cpu_to_le_32(value), txq->io_tail_reg);
+#else
+	rte_write32_relaxed(rte_cpu_to_le_32(value), txq->io_tail_reg);
+#endif
+}
+
 void hns3_dev_rx_queue_release(void *queue);
 void hns3_dev_tx_queue_release(void *queue);
 void hns3_free_all_queues(struct rte_eth_dev *dev);
@@ -718,5 +741,6 @@ void hns3_stop_all_txqs(struct rte_eth_dev *dev);
 void hns3_restore_tqp_enable_state(struct hns3_hw *hw);
 int hns3_tx_done_cleanup(void *txq, uint32_t free_cnt);
 void hns3_enable_rxd_adv_layout(struct hns3_hw *hw);
+void hns3_tx_push_init(struct rte_eth_dev *dev);
 
 #endif /* _HNS3_RXTX_H_ */
diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index 68f098f..b5047e7 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -84,7 +84,7 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
 	txq->next_to_use = next_to_use;
 	txq->tx_bd_ready -= nb_tx;
 
-	hns3_write_reg_opt(txq->io_tail_reg, nb_tx);
+	hns3_write_txq_tail_reg(txq, nb_tx);
 
 	return nb_tx;
 }
diff --git a/drivers/net/hns3/hns3_rxtx_vec_sve.c b/drivers/net/hns3/hns3_rxtx_vec_sve.c
index 2a22a1a..6a20378 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_sve.c
+++ b/drivers/net/hns3/hns3_rxtx_vec_sve.c
@@ -452,7 +452,7 @@ hns3_xmit_fixed_burst_vec_sve(void *__restrict tx_queue,
 	txq->next_to_use += nb_pkts - nb_tx;
 
 	txq->tx_bd_ready -= nb_pkts;
-	hns3_write_reg_opt(txq->io_tail_reg, nb_pkts);
+	hns3_write_txq_tail_reg(txq, nb_pkts);
 
 	return nb_pkts;
 }
-- 
2.7.4


  parent reply	other threads:[~2021-03-10  6:16 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-10  6:16 [dpdk-dev] [PATCH v2 0/9] features and bugfixes for hns3 Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 1/9] net/hns3: support runtime config to select IO burst func Min Hu (Connor)
2021-03-11 17:31   ` Ferruh Yigit
2021-03-12  0:59     ` Min Hu (Connor)
2021-03-11 17:39   ` Ferruh Yigit
2021-03-12  1:01     ` Min Hu (Connor)
2021-03-10  6:16 ` Min Hu (Connor) [this message]
2021-03-11 18:05   ` [dpdk-dev] [PATCH v2 2/9] net/hns3: support Tx push quick doorbell to improve perf Ferruh Yigit
2021-03-12 11:02     ` Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 3/9] net/hns3: support for outer UDP cksum Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 4/9] net/hns3: adjust the format of RAS related structures Min Hu (Connor)
2021-03-11 18:25   ` Ferruh Yigit
2021-03-12  1:51     ` Min Hu (Connor)
2021-03-12 10:02       ` Ferruh Yigit
2021-03-12 10:51         ` Min Hu (Connor)
2021-03-16 11:32           ` Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 5/9] net/hns3: delete redundant xstats RAS statistics Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 6/9] net/hns3: support imissed stats for PF/VF Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 7/9] net/hns3: support oerrors stats in PF Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 8/9] net/hns3: support query Tx descriptor status Min Hu (Connor)
2021-03-10  6:16 ` [dpdk-dev] [PATCH v2 9/9] net/hns3: support query Rx " Min Hu (Connor)
2021-03-12 11:51 ` [dpdk-dev] [PATCH v3 0/8] features and bugfixes for hns3 Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 1/8] net/hns3: support runtime config to select IO burst func Min Hu (Connor)
2021-03-16 12:40     ` Ferruh Yigit
2021-03-17  1:14       ` Min Hu (Connor)
2021-03-17 15:28         ` Ferruh Yigit
2021-03-19  1:08           ` Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 2/8] net/hns3: support for outer UDP cksum Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 3/8] net/hns3: adjust the format of RAS related structures Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 4/8] net/hns3: delete redundant xstats RAS statistics Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 5/8] net/hns3: support imissed stats for PF/VF Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 6/8] net/hns3: support oerrors stats in PF Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 7/8] net/hns3: support query Tx descriptor status Min Hu (Connor)
2021-03-12 11:51   ` [dpdk-dev] [PATCH v3 8/8] net/hns3: support query Rx " Min Hu (Connor)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1615356985-24722-3-git-send-email-humin29@huawei.com \
    --to=humin29@huawei.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).