From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 28F5CA04B9; Mon, 7 Sep 2020 11:09:53 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BB3711C120; Mon, 7 Sep 2020 11:09:20 +0200 (CEST) Received: from mail.chinasoftinc.com (unknown [114.113.233.8]) by dpdk.org (Postfix) with ESMTP id 75BF61BF8A for ; Mon, 7 Sep 2020 11:09:18 +0200 (CEST) Received: from localhost.localdomain (65.49.108.226) by INCCAS002.ito.icss (10.168.0.60) with Microsoft SMTP Server id 14.3.487.0; Mon, 7 Sep 2020 17:09:11 +0800 From: "Wei Hu (Xavier)" To: CC: Date: Mon, 7 Sep 2020 17:08:22 +0800 Message-ID: <20200907090825.1761-6-huwei013@chinasoftinc.com> X-Mailer: git-send-email 2.9.5 In-Reply-To: <20200907090825.1761-1-huwei013@chinasoftinc.com> References: <20200907090825.1761-1-huwei013@chinasoftinc.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [65.49.108.226] Subject: [dpdk-dev] [PATCH 5/8] net/hns3: add vector Tx burst with NEON instructions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Wei Hu (Xavier)" This patch adds NEON vector instructions to optimize Tx burst process. Signed-off-by: Huisong Li Signed-off-by: Wei Hu (Xavier) Signed-off-by: Chengwen Feng --- config/common_base | 1 + config/common_linux | 1 + drivers/net/hns3/Makefile | 5 +++ drivers/net/hns3/hns3_ethdev.c | 2 + drivers/net/hns3/hns3_ethdev.h | 2 + drivers/net/hns3/hns3_ethdev_vf.c | 2 + drivers/net/hns3/hns3_rxtx.c | 33 ++++++++++++++ drivers/net/hns3/hns3_rxtx.h | 20 ++++++++- drivers/net/hns3/hns3_rxtx_vec.c | 47 ++++++++++++++++++++ drivers/net/hns3/hns3_rxtx_vec.h | 57 ++++++++++++++++++++++++ drivers/net/hns3/hns3_rxtx_vec_neon.h | 81 +++++++++++++++++++++++++++++++++++ drivers/net/hns3/meson.build | 4 ++ 12 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 drivers/net/hns3/hns3_rxtx_vec.c create mode 100644 drivers/net/hns3/hns3_rxtx_vec.h create mode 100644 drivers/net/hns3/hns3_rxtx_vec_neon.h diff --git a/config/common_base b/config/common_base index fbf0ee7..af1dea6 100644 --- a/config/common_base +++ b/config/common_base @@ -292,6 +292,7 @@ CONFIG_RTE_LIBRTE_HINIC_PMD=n # Compile burst-oriented HNS3 PMD driver # CONFIG_RTE_LIBRTE_HNS3_PMD=n +CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR=n # # Compile Pensando IONIC PMD driver diff --git a/config/common_linux b/config/common_linux index 8168106..e88a404 100644 --- a/config/common_linux +++ b/config/common_linux @@ -66,3 +66,4 @@ CONFIG_RTE_LIBRTE_HINIC_PMD=y # Hisilicon HNS3 PMD driver # CONFIG_RTE_LIBRTE_HNS3_PMD=y +CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR=y diff --git a/drivers/net/hns3/Makefile b/drivers/net/hns3/Makefile index d7798a4..d08d8fa 100644 --- a/drivers/net/hns3/Makefile +++ b/drivers/net/hns3/Makefile @@ -30,6 +30,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_ethdev_vf.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_cmd.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_mbx.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_rxtx.c + +ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +SRCS-$(CONFIG_RTE_LIBRTE_HNS3_INC_VECTOR) += hns3_rxtx_vec.c +endif + SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_rss.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_flow.c SRCS-$(CONFIG_RTE_LIBRTE_HNS3_PMD) += hns3_fdir.c diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index 8701994..68239f5 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2353,6 +2353,8 @@ hns3_dev_configure(struct rte_eth_dev *dev) hns->rx_simple_allowed = true; hns->tx_simple_allowed = true; + hns->tx_vec_allowed = true; + hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; diff --git a/drivers/net/hns3/hns3_ethdev.h b/drivers/net/hns3/hns3_ethdev.h index ef85034..098b6ce 100644 --- a/drivers/net/hns3/hns3_ethdev.h +++ b/drivers/net/hns3/hns3_ethdev.h @@ -644,6 +644,8 @@ struct hns3_adapter { bool rx_simple_allowed; bool tx_simple_allowed; + bool tx_vec_allowed; + struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned; }; diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index 915b896..f3e6aea 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -823,6 +823,8 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) hns->rx_simple_allowed = true; hns->tx_simple_allowed = true; + hns->tx_vec_allowed = true; + hns3_init_rx_ptype_tble(dev); hw->adapter_state = HNS3_NIC_CONFIGURED; diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 08a3dcd..a537fbe 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -95,6 +95,8 @@ hns3_tx_queue_release(void *queue) rte_memzone_free(txq->mz); if (txq->sw_ring) rte_free(txq->sw_ring); + if (txq->free) + rte_free(txq->free); rte_free(txq); } } @@ -1020,6 +1022,7 @@ hns3_fake_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, /* Don't need alloc sw_ring, because upper applications don't use it */ txq->sw_ring = NULL; + txq->free = NULL; txq->hns = hns; txq->tx_deferred_start = false; @@ -2052,6 +2055,15 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, txq->tx_bd_ready = txq->nb_tx_desc - 1; txq->tx_free_thresh = tx_free_thresh; txq->tx_rs_thresh = tx_rs_thresh; + txq->free = rte_zmalloc_socket("hns3 TX mbuf free array", + sizeof(struct rte_mbuf *) * txq->tx_rs_thresh, + RTE_CACHE_LINE_SIZE, socket_id); + if (!txq->free) { + hns3_err(hw, "failed to allocate tx mbuf free array!"); + hns3_tx_queue_release(txq); + return -ENOMEM; + } + txq->port_id = dev->data->port_id; txq->pvid_state = hw->port_base_vlan_cfg.state; txq->configured = true; @@ -3105,6 +3117,20 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return nb_tx; } +int __rte_weak +hns3_tx_check_vec_support(__rte_unused struct rte_eth_dev *dev) +{ + return -ENOTSUP; +} + +uint16_t __rte_weak +hns3_xmit_pkts_vec(__rte_unused void *tx_queue, + __rte_unused struct rte_mbuf **tx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode) @@ -3116,6 +3142,8 @@ hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, info = "Scalar Simple"; else if (pkt_burst == hns3_xmit_pkts) info = "Scalar"; + else if (pkt_burst == hns3_xmit_pkts_vec) + info = "Vector Neon"; if (info == NULL) return -EINVAL; @@ -3131,6 +3159,11 @@ hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep) uint64_t offloads = dev->data->dev_conf.txmode.offloads; struct hns3_adapter *hns = dev->data->dev_private; + if (hns->tx_vec_allowed && hns3_tx_check_vec_support(dev) == 0) { + *prep = NULL; + return hns3_xmit_pkts_vec; + } + if (hns->tx_simple_allowed && offloads == (offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) { *prep = NULL; diff --git a/drivers/net/hns3/hns3_rxtx.h b/drivers/net/hns3/hns3_rxtx.h index 9933494..c5a510b 100644 --- a/drivers/net/hns3/hns3_rxtx.h +++ b/drivers/net/hns3/hns3_rxtx.h @@ -17,6 +17,10 @@ #define HNS3_DEFAULT_TX_RS_THRESH 32 #define HNS3_TX_FAST_FREE_AHEAD 64 +#define HNS3_UINT8_BIT 8 +#define HNS3_UINT16_BIT 16 +#define HNS3_UINT32_BIT 32 + #define HNS3_512_BD_BUF_SIZE 512 #define HNS3_1K_BD_BUF_SIZE 1024 #define HNS3_2K_BD_BUF_SIZE 2048 @@ -132,6 +136,13 @@ #define HNS3_L3_LEN_UNIT 2UL #define HNS3_L4_LEN_UNIT 2UL +#define HNS3_TXD_DEFAULT_BDTYPE 0 +#define HNS3_TXD_VLD_CMD (0x1 << HNS3_TXD_VLD_B) +#define HNS3_TXD_FE_CMD (0x1 << HNS3_TXD_FE_B) +#define HNS3_TXD_DEFAULT_VLD_FE_BDTYPE \ + (HNS3_TXD_VLD_CMD | HNS3_TXD_FE_CMD | HNS3_TXD_DEFAULT_BDTYPE) +#define HNS3_TXD_SEND_SIZE_SHIFT 16 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, @@ -317,9 +328,13 @@ struct hns3_tx_queue { * all descriptors are cleared. and then free all mbufs in the batch. * - tx_rs_thresh * Number of mbufs released at a time. - + * + * - free + * Tx mbuf free array used for preserving temporarily address of mbuf + * released back to mempool, when releasing mbuf in batches. */ uint16_t tx_rs_thresh; + struct rte_mbuf **free; /* * port based vlan configuration state. @@ -558,6 +573,8 @@ uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, struct rte_eth_burst_mode *mode); @@ -577,6 +594,7 @@ int hns3_restore_gro_conf(struct hns3_hw *hw); void hns3_update_all_queues_pvid_state(struct hns3_hw *hw); void hns3_rx_scattered_reset(struct rte_eth_dev *dev); void hns3_rx_scattered_calc(struct rte_eth_dev *dev); +int hns3_tx_check_vec_support(struct rte_eth_dev *dev); void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --git a/drivers/net/hns3/hns3_rxtx_vec.c b/drivers/net/hns3/hns3_rxtx_vec.c new file mode 100644 index 0000000..1154b6f --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#include +#include + +#include "hns3_ethdev.h" +#include "hns3_rxtx.h" +#include "hns3_rxtx_vec.h" + +#if defined RTE_ARCH_ARM64 +#include "hns3_rxtx_vec_neon.h" +#endif + +int +hns3_tx_check_vec_support(struct rte_eth_dev *dev) +{ + struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; + + /* Only support DEV_TX_OFFLOAD_MBUF_FAST_FREE */ + if (txmode->offloads != DEV_TX_OFFLOAD_MBUF_FAST_FREE) + return -ENOTSUP; + + return 0; +} + +uint16_t +hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue; + uint16_t nb_tx = 0; + + while (nb_pkts) { + uint16_t ret, new_burst; + + new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx], + new_burst); + nb_tx += ret; + nb_pkts -= ret; + if (ret < new_burst) + break; + } + + return nb_tx; +} diff --git a/drivers/net/hns3/hns3_rxtx_vec.h b/drivers/net/hns3/hns3_rxtx_vec.h new file mode 100644 index 0000000..90679bf --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#ifndef _HNS3_RXTX_VEC_H_ +#define _HNS3_RXTX_VEC_H_ + +#include "hns3_rxtx.h" +#include "hns3_ethdev.h" + +static inline void +hns3_tx_free_buffers(struct hns3_tx_queue *txq) +{ + struct rte_mbuf **free = txq->free; + struct hns3_entry *tx_entry; + struct hns3_desc *tx_desc; + struct rte_mbuf *m; + int nb_free = 0; + int i; + + /* + * All mbufs can be released only when the VLD bits of all + * descriptors in a batch are cleared. + */ + tx_desc = &txq->tx_ring[txq->next_to_clean]; + for (i = 0; i < txq->tx_rs_thresh; i++, tx_desc++) { + if (tx_desc->tx.tp_fe_sc_vld_ra_ri & + rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B))) + return; + } + + tx_entry = &txq->sw_ring[txq->next_to_clean]; + for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) { + m = rte_pktmbuf_prefree_seg(tx_entry->mbuf); + tx_entry->mbuf = NULL; + + if (m == NULL) + continue; + + if (nb_free && m->pool != free[0]->pool) { + rte_mempool_put_bulk(free[0]->pool, (void **)free, + nb_free); + nb_free = 0; + } + free[nb_free++] = m; + } + + if (nb_free) + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + + /* Update numbers of available descriptor due to buffer freed */ + txq->tx_bd_ready += txq->tx_rs_thresh; + txq->next_to_clean += txq->tx_rs_thresh; + if (txq->next_to_clean >= txq->nb_tx_desc) + txq->next_to_clean = 0; +} +#endif /* _HNS3_RXTX_VEC_H_ */ diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h new file mode 100644 index 0000000..2bd2b35 --- /dev/null +++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Hisilicon Limited. + */ + +#ifndef _HNS3_RXTX_VEC_NEON_H_ +#define _HNS3_RXTX_VEC_NEON_H_ + +#include + +#pragma GCC diagnostic ignored "-Wcast-qual" + +static inline void +hns3_vec_tx(volatile struct hns3_desc *desc, struct rte_mbuf *pkt) +{ + uint64x2_t val1 = { pkt->buf_iova + pkt->data_off, + ((uint64_t)pkt->data_len) << HNS3_TXD_SEND_SIZE_SHIFT }; + uint64x2_t val2 = { 0, + ((uint64_t)HNS3_TXD_DEFAULT_VLD_FE_BDTYPE) << HNS3_UINT32_BIT }; + vst1q_u64((uint64_t *)&desc->addr, val1); + vst1q_u64((uint64_t *)&desc->tx.outer_vlan_tag, val2); +} + +static uint16_t +hns3_xmit_fixed_burst_vec(void *__restrict tx_queue, + struct rte_mbuf **__restrict tx_pkts, + uint16_t nb_pkts) +{ + struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue; + volatile struct hns3_desc *tx_desc; + struct hns3_entry *tx_entry; + uint16_t next_to_use; + uint16_t nb_commit; + uint16_t nb_tx; + uint16_t n, i; + + if (txq->tx_bd_ready < txq->tx_free_thresh) + hns3_tx_free_buffers(txq); + + nb_commit = RTE_MIN(txq->tx_bd_ready, nb_pkts); + if (unlikely(nb_commit == 0)) { + txq->queue_full_cnt++; + return 0; + } + nb_tx = nb_commit; + + next_to_use = txq->next_to_use; + tx_desc = &txq->tx_ring[next_to_use]; + tx_entry = &txq->sw_ring[next_to_use]; + + /* + * We need to deal with n descriptors first for better performance, + * if nb_commit is greater than the difference between txq->nb_tx_desc + * and next_to_use in sw_ring and tx_ring. + */ + n = txq->nb_tx_desc - next_to_use; + if (nb_commit >= n) { + for (i = 0; i < n; i++, tx_pkts++, tx_desc++) { + hns3_vec_tx(tx_desc, *tx_pkts); + tx_entry[i].mbuf = *tx_pkts; + } + + nb_commit -= n; + next_to_use = 0; + tx_desc = &txq->tx_ring[next_to_use]; + tx_entry = &txq->sw_ring[next_to_use]; + } + + for (i = 0; i < nb_commit; i++, tx_pkts++, tx_desc++) { + hns3_vec_tx(tx_desc, *tx_pkts); + tx_entry[i].mbuf = *tx_pkts; + } + + next_to_use += nb_commit; + txq->next_to_use = next_to_use; + txq->tx_bd_ready -= nb_tx; + + hns3_write_reg_opt(txq->io_tail_reg, nb_tx); + + return nb_tx; +} +#endif /* _HNS3_RXTX_VEC_NEON_H_ */ diff --git a/drivers/net/hns3/meson.build b/drivers/net/hns3/meson.build index e01e6ce..19aee71 100644 --- a/drivers/net/hns3/meson.build +++ b/drivers/net/hns3/meson.build @@ -27,4 +27,8 @@ sources = files('hns3_cmd.c', 'hns3_stats.c', 'hns3_mp.c') +if (dpdk_conf.has('RTE_ARCH_ARM64')) + sources += files('hns3_rxtx_vec.c') +endif + deps += ['hash'] -- 2.9.5