From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7FED142217; Fri, 1 Sep 2023 01:50:01 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 5C36E40299; Fri, 1 Sep 2023 01:50:01 +0200 (CEST) Received: from forward502c.mail.yandex.net (forward502c.mail.yandex.net [178.154.239.210]) by mails.dpdk.org (Postfix) with ESMTP id E1A784014F for ; Fri, 1 Sep 2023 01:49:59 +0200 (CEST) Received: from mail-nwsmtp-smtp-production-main-17.iva.yp-c.yandex.net (mail-nwsmtp-smtp-production-main-17.iva.yp-c.yandex.net [IPv6:2a02:6b8:c0c:7c15:0:640:33c0:0]) by forward502c.mail.yandex.net (Yandex) with ESMTP id 59E745ECDD; Fri, 1 Sep 2023 02:49:59 +0300 (MSK) Received: by mail-nwsmtp-smtp-production-main-17.iva.yp-c.yandex.net (smtp/Yandex) with ESMTPSA id pnc2hUPDeSw0-EQk4EWy8; Fri, 01 Sep 2023 02:49:58 +0300 X-Yandex-Fwd: 1 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yandex.ru; s=mail; t=1693525798; bh=XPNEGS/hof4o5gftd/wq0gag0gbEiMnKjGLDwaf4TzQ=; h=From:In-Reply-To:Cc:Date:References:To:Subject:Message-ID; b=hUAInhMvkG+s7u09IHG+H7d0Fr4SsNb9dYXZkvqoa92wKw1RhWqiNsYKeGGW7xm7U 4n7nysqvzHqFbrZiLnBEjvc7ssM9LZ7jyU/nn+7p5zQp7ntEaEgz42d86PoPxA3399 csfQQOWyLUzN3KlkeyoXmONBdzfH2h9vS8ofMSfU= Authentication-Results: mail-nwsmtp-smtp-production-main-17.iva.yp-c.yandex.net; dkim=pass header.i=@yandex.ru Message-ID: <596e6bef-5cb8-4754-2910-2c89c39d67e7@yandex.ru> Date: Fri, 1 Sep 2023 00:49:51 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.13.0 Subject: Re: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode Content-Language: en-US To: Konstantin Ananyev , Feifei Wang Cc: "dev@dpdk.org" , nd , Honnappa Nagarahalli , Ruifeng Wang , Yuying Zhang , Beilei Xing References: <20220420081650.2043183-1-feifei.wang2@arm.com> <20230822072710.1945027-1-feifei.wang2@arm.com> <20230822072710.1945027-3-feifei.wang2@arm.com> From: Konstantin Ananyev In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org 31/08/2023 18:24, Konstantin Ananyev пишет: > > >>> >>> Define specific function implementation for i40e driver. >>> Currently, mbufs recycle mode can support 128bit vector path and avx2 path. >>> And can be enabled both in fast free and no fast free mode. >>> >>> Suggested-by: Honnappa Nagarahalli >>> Signed-off-by: Feifei Wang >>> Reviewed-by: Ruifeng Wang >>> Reviewed-by: Honnappa Nagarahalli >>> --- >>> drivers/net/i40e/i40e_ethdev.c | 1 + >>> drivers/net/i40e/i40e_ethdev.h | 2 + >>> .../net/i40e/i40e_recycle_mbufs_vec_common.c | 147 >>> ++++++++++++++++++ >>> drivers/net/i40e/i40e_rxtx.c | 32 ++++ >>> drivers/net/i40e/i40e_rxtx.h | 4 + >>> drivers/net/i40e/meson.build | 1 + >>> 6 files changed, 187 insertions(+) >>> create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c >>> >>> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c >>> index 8271bbb394..50ba9aac94 100644 >>> --- a/drivers/net/i40e/i40e_ethdev.c >>> +++ b/drivers/net/i40e/i40e_ethdev.c >>> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { >>> .flow_ops_get = i40e_dev_flow_ops_get, >>> .rxq_info_get = i40e_rxq_info_get, >>> .txq_info_get = i40e_txq_info_get, >>> + .recycle_rxq_info_get = i40e_recycle_rxq_info_get, >>> .rx_burst_mode_get = i40e_rx_burst_mode_get, >>> .tx_burst_mode_get = i40e_tx_burst_mode_get, >>> .timesync_enable = i40e_timesync_enable, >>> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h >>> index 6f65d5e0ac..af758798e1 100644 >>> --- a/drivers/net/i40e/i40e_ethdev.h >>> +++ b/drivers/net/i40e/i40e_ethdev.h >>> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, >>> uint16_t queue_id, >>> struct rte_eth_rxq_info *qinfo); >>> void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, >>> struct rte_eth_txq_info *qinfo); >>> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, >>> + struct rte_eth_recycle_rxq_info *recycle_rxq_info); >>> int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, >>> struct rte_eth_burst_mode *mode); int >>> i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff -- >>> git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c >>> b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c >>> new file mode 100644 >>> index 0000000000..5663ecccde >>> --- /dev/null >>> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c >>> @@ -0,0 +1,147 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright (c) 2023 Arm Limited. >>> + */ >>> + >>> +#include >>> +#include >>> + >>> +#include "base/i40e_prototype.h" >>> +#include "base/i40e_type.h" >>> +#include "i40e_ethdev.h" >>> +#include "i40e_rxtx.h" >>> + >>> +#pragma GCC diagnostic ignored "-Wcast-qual" >>> + >>> +void >>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t >>> +nb_mbufs) { >>> + struct i40e_rx_queue *rxq = rx_queue; >>> + struct i40e_rx_entry *rxep; >>> + volatile union i40e_rx_desc *rxdp; >>> + uint16_t rx_id; >>> + uint64_t paddr; >>> + uint64_t dma_addr; >>> + uint16_t i; >>> + >>> + rxdp = rxq->rx_ring + rxq->rxrearm_start; >>> + rxep = &rxq->sw_ring[rxq->rxrearm_start]; >>> + >>> + for (i = 0; i < nb_mbufs; i++) { >>> + /* Initialize rxdp descs. */ >>> + paddr = (rxep[i].mbuf)->buf_iova + >>> RTE_PKTMBUF_HEADROOM; >>> + dma_addr = rte_cpu_to_le_64(paddr); >>> + /* flush desc with pa dma_addr */ >>> + rxdp[i].read.hdr_addr = 0; >>> + rxdp[i].read.pkt_addr = dma_addr; >>> + } >>> + >>> + /* Update the descriptor initializer index */ >>> + rxq->rxrearm_start += nb_mbufs; >>> + rx_id = rxq->rxrearm_start - 1; >>> + >>> + if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { >>> + rxq->rxrearm_start = 0; >>> + rx_id = rxq->nb_rx_desc - 1; >>> + } >>> + >>> + rxq->rxrearm_nb -= nb_mbufs; >>> + >>> + rte_io_wmb(); >>> + /* Update the tail pointer on the NIC */ >>> + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); } >>> + >>> +uint16_t >>> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue, >>> + struct rte_eth_recycle_rxq_info *recycle_rxq_info) { >>> + struct i40e_tx_queue *txq = tx_queue; >>> + struct i40e_tx_entry *txep; >>> + struct rte_mbuf **rxep; >>> + int i, n; >>> + uint16_t nb_recycle_mbufs; >>> + uint16_t avail = 0; >>> + uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size; >>> + uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1; >>> + uint16_t refill_requirement = recycle_rxq_info->refill_requirement; >>> + uint16_t refill_head = *recycle_rxq_info->refill_head; >>> + uint16_t receive_tail = *recycle_rxq_info->receive_tail; >>> + >>> + /* Get available recycling Rx buffers. */ >>> + avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask; >>> + >>> + /* Check Tx free thresh and Rx available space. */ >>> + if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh) >>> + return 0; >>> + >>> + /* check DD bits on threshold descriptor */ >>> + if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & >>> + >>> rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != >>> + >>> rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) >>> + return 0; >>> + >>> + n = txq->tx_rs_thresh; >>> + nb_recycle_mbufs = n; >>> + >>> + /* Mbufs recycle mode can only support no ring buffer wrapping >>> around. >>> + * Two case for this: >>> + * >>> + * case 1: The refill head of Rx buffer ring needs to be aligned with >>> + * mbuf ring size. In this case, the number of Tx freeing buffers >>> + * should be equal to refill_requirement. >>> + * >>> + * case 2: The refill head of Rx ring buffer does not need to be aligned >>> + * with mbuf ring size. In this case, the update of refill head can not >>> + * exceed the Rx mbuf ring size. >>> + */ >>> + if (refill_requirement != n || >>> + (!refill_requirement && (refill_head + n > mbuf_ring_size))) >>> + return 0; >>> + >>> + /* First buffer to free from S/W ring is at index >>> + * tx_next_dd - (tx_rs_thresh-1). >>> + */ >>> + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; >>> + rxep = recycle_rxq_info->mbuf_ring; >>> + rxep += refill_head; >>> + >>> + if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { >>> + /* Avoid txq contains buffers from unexpected mempool. */ >>> + if (unlikely(recycle_rxq_info->mp >>> + != txep[0].mbuf->pool)) >>> + return 0; >>> + >>> + /* Directly put mbufs from Tx to Rx. */ >>> + for (i = 0; i < n; i++) >>> + rxep[i] = txep[i].mbuf; >>> + } else { >>> + for (i = 0; i < n; i++) { >>> + rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf); >>> + >>> + /* If Tx buffers are not the last reference or from >>> + * unexpected mempool, previous copied buffers are >>> + * considered as invalid. >>> + */ >>> + if (unlikely((rxep[i] == NULL && refill_requirement) || >> [Konstantin] >> Could you pls remind me why it is ok to have rxep[i]==NULL when >> refill_requirement is not set? >> >> If reill_requirement is not zero, it means each tx freed buffer must be valid and can be put into Rx >> sw_ring. Then the refill head of Rx buffer ring can be aligned with mbuf ring size. Briefly speaking >> the number of Tx valid freed buffer must be equal to Rx refill_requirement. For example, i40e driver. >> >> If reill_requirement is zero, it means that the refill head of Rx buffer ring does not need to be aligned >> with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-ring, >> and not to be equal to the Rx setting rearm number. For example, mlx5 driver. >> >> In conclusion, above difference is due to pmd drivers have different strategies to update their Rx rearm(refill) head. >> For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to the number of each rearm is a fixed value by default. >> For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real index. Thus its rearm number can be a different value. > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is not free yet and can't be reused. > Shouldn't we then set nb_recycle_mbufs = 0 in that case too? > Or probably would be enough to skip that mbuf? > Might be something like that: > > for (i = 0, j = 0; i < n; i++) { > > rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf); > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)) { > if (refill_requirement) { > nb_recycle_mbufs = 0; > break; > } > } else > j++; > } > > /* now j contains actual number of recycled mbufs */ > > ? After another thought, it might be easier and cleaner just to: if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool) nb_recycle_mbufs = 0; Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't recycle that mbuf. > >> >>> + recycle_rxq_info->mp != txep[i].mbuf- >>>> pool)) >>> + nb_recycle_mbufs = 0; >>> + } >>> + /* If Tx buffers are not the last reference or >>> + * from unexpected mempool, all recycled buffers >>> + * are put into mempool. >>> + */ >>> + if (nb_recycle_mbufs == 0) >>> + for (i = 0; i < n; i++) { >>> + if (rxep[i] != NULL) >>> + rte_mempool_put(rxep[i]->pool, >>> rxep[i]); >>> + } >>> + } >>> + >>> + /* Update counters for Tx. */ >>> + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); >>> + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); >>> + if (txq->tx_next_dd >= txq->nb_tx_desc) >>> + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); >>> + >>> + return nb_recycle_mbufs; >>> +} >>> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index >>> b4f65b58fa..a9c9eb331c 100644 >>> --- a/drivers/net/i40e/i40e_rxtx.c >>> +++ b/drivers/net/i40e/i40e_rxtx.c >>> @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev, >>> uint16_t queue_id, >>> qinfo->conf.offloads = txq->offloads; >>> } >>> >>> +void >>> +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, >>> + struct rte_eth_recycle_rxq_info *recycle_rxq_info) { >>> + struct i40e_rx_queue *rxq; >>> + struct i40e_adapter *ad = >>> + I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); >>> + >>> + rxq = dev->data->rx_queues[queue_id]; >>> + >>> + recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring; >>> + recycle_rxq_info->mp = rxq->mp; >>> + recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc; >>> + recycle_rxq_info->receive_tail = &rxq->rx_tail; >>> + >>> + if (ad->rx_vec_allowed) { >>> + recycle_rxq_info->refill_requirement = >>> RTE_I40E_RXQ_REARM_THRESH; >>> + recycle_rxq_info->refill_head = &rxq->rxrearm_start; >>> + } else { >>> + recycle_rxq_info->refill_requirement = rxq->rx_free_thresh; >>> + recycle_rxq_info->refill_head = &rxq->rx_free_trigger; >>> + } >>> +} >>> + >>> #ifdef RTE_ARCH_X86 >>> static inline bool >>> get_avx_supported(bool request_avx512) >>> @@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev) >>> dev->rx_pkt_burst = ad->rx_use_avx2 ? >>> i40e_recv_scattered_pkts_vec_avx2 : >>> i40e_recv_scattered_pkts_vec; >>> + dev->recycle_rx_descriptors_refill = >>> + i40e_recycle_rx_descriptors_refill_vec; >>> } >>> } else { >>> if (ad->rx_use_avx512) { >>> @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev) >>> dev->rx_pkt_burst = ad->rx_use_avx2 ? >>> i40e_recv_pkts_vec_avx2 : >>> i40e_recv_pkts_vec; >>> + dev->recycle_rx_descriptors_refill = >>> + i40e_recycle_rx_descriptors_refill_vec; >>> } >>> } >>> #else /* RTE_ARCH_X86 */ >>> + dev->recycle_rx_descriptors_refill = >>> +i40e_recycle_rx_descriptors_refill_vec; >>> if (dev->data->scattered_rx) { >>> PMD_INIT_LOG(DEBUG, >>> "Using Vector Scattered Rx (port %d).", @@ >>> -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev) >>> dev->tx_pkt_burst = ad->tx_use_avx2 ? >>> i40e_xmit_pkts_vec_avx2 : >>> i40e_xmit_pkts_vec; >>> + dev->recycle_tx_mbufs_reuse = >>> i40e_recycle_tx_mbufs_reuse_vec; >>> } >>> #else /* RTE_ARCH_X86 */ >>> PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).", >>> dev->data->port_id); >>> dev->tx_pkt_burst = i40e_xmit_pkts_vec; >>> + dev->recycle_tx_mbufs_reuse = >>> i40e_recycle_tx_mbufs_reuse_vec; >>> #endif /* RTE_ARCH_X86 */ >>> } else { >>> PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); >>> dev->tx_pkt_burst = i40e_xmit_pkts_simple; >>> + dev->recycle_tx_mbufs_reuse = >>> i40e_recycle_tx_mbufs_reuse_vec; >>> } >>> dev->tx_pkt_prepare = i40e_simple_prep_pkts; >>> } else { >>> diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index >>> a8686224e5..b191f23e1f 100644 >>> --- a/drivers/net/i40e/i40e_rxtx.h >>> +++ b/drivers/net/i40e/i40e_rxtx.h >>> @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void >>> *rx_queue); int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t >>> offset); int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset); >>> >>> +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue, >>> + struct rte_eth_recycle_rxq_info *recycle_rxq_info); void >>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t >>> +nb_mbufs); >>> + >>> uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, >>> uint16_t nb_pkts); >>> uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git >>> a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index >>> 8e53b87a65..3b1a233c84 100644 >>> --- a/drivers/net/i40e/meson.build >>> +++ b/drivers/net/i40e/meson.build >>> @@ -34,6 +34,7 @@ sources = files( >>> 'i40e_tm.c', >>> 'i40e_hash.c', >>> 'i40e_vf_representor.c', >>> + 'i40e_recycle_mbufs_vec_common.c', >>> 'rte_pmd_i40e.c', >>> ) >>> >>> -- >>> 2.25.1 >