From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E673BA00C2; Mon, 22 Aug 2022 05:48:17 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4F7054284D; Mon, 22 Aug 2022 05:48:12 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 974CB4282F for ; Mon, 22 Aug 2022 05:48:10 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 1D1BF1424; Sun, 21 Aug 2022 20:48:13 -0700 (PDT) Received: from net-arm-n1amp-02.shanghai.arm.com (net-arm-n1amp-02.shanghai.arm.com [10.169.210.108]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 795283F718; Sun, 21 Aug 2022 20:48:07 -0700 (PDT) From: Joyce Kong To: jgrajcia@cisco.com, stephen@networkplumber.org, huzaifa.rahman@emumba.com Cc: dev@dpdk.org, nd@arm.com, mb@smartsharesystems.com, ruifeng.wang@arm.com, Joyce Kong Subject: [PATCH v3 1/2] net/memif: add a Rx fast path Date: Mon, 22 Aug 2022 03:47:30 +0000 Message-Id: <20220822034731.528424-2-joyce.kong@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220822034731.528424-1-joyce.kong@arm.com> References: <20220701102815.1444223-2-joyce.kong@arm.com> <20220822034731.528424-1-joyce.kong@arm.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org For memif non-zero-copy mode, there is a branch to compare the mbuf and memif buffer size during memory copying. Mbuf and memif buffer size is defined at compile time. If memif buf size <= mbuf size, add a fast Rx memory copy path by removing this branch and mbuf bulk alloc. The removal of the branch and bulk alloc lead to considerable performance uplift. Test with 1p1q on N1SDP AArch64 server, -------------------------------------------- buf size | memif <= mbuf | memif > mbuf | -------------------------------------------- non-zc gain | 26.85% | -0.37% | -------------------------------------------- zc gain | 8.57% | 3.04% | -------------------------------------------- Test with 1p1q on Cascade Lake Xeon X86 server, -------------------------------------------- buf size | memif <= mbuf | memif > mbuf | -------------------------------------------- non-zc gain | 17.54% | -0.42% | -------------------------------------------- zc gain | 10.67% | 0.26% | -------------------------------------------- Signed-off-by: Joyce Kong Reviewed-by: Ruifeng Wang Acked-by: Morten Brørup --- drivers/net/memif/rte_eth_memif.c | 137 +++++++++++++++++++++--------- 1 file changed, 96 insertions(+), 41 deletions(-) diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c index dd951b8296..2ea2a8e266 100644 --- a/drivers/net/memif/rte_eth_memif.c +++ b/drivers/net/memif/rte_eth_memif.c @@ -342,66 +342,122 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) goto refill; n_slots = last_slot - cur_slot; - while (n_slots && n_rx_pkts < nb_pkts) { - mbuf_head = rte_pktmbuf_alloc(mq->mempool); - if (unlikely(mbuf_head == NULL)) - goto no_free_bufs; - mbuf = mbuf_head; - mbuf->port = mq->in_port; - dst_off = 0; + if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) { + struct rte_mbuf *mbufs[nb_pkts]; + ret = rte_pktmbuf_alloc_bulk(mq->mempool, mbufs, nb_pkts); + if (unlikely(ret < 0)) + goto no_free_bufs; + + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = mbufs[n_rx_pkts]; + mbuf = mbuf_head; + +next_slot1: + mbuf->port = mq->in_port; + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; -next_slot: - s0 = cur_slot & mask; - d0 = &ring->desc[s0]; + cp_len = d0->length; - src_len = d0->length; - src_off = 0; + rte_pktmbuf_data_len(mbuf) = cp_len; + rte_pktmbuf_pkt_len(mbuf) = cp_len; + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; - do { - dst_len = mbuf_size - dst_off; - if (dst_len == 0) { - dst_off = 0; - dst_len = mbuf_size; + rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), + (uint8_t *)memif_get_buffer(proc_private, d0), cp_len); - /* store pointer to tail */ + cur_slot++; + n_slots--; + + if (d0->flags & MEMIF_DESC_FLAG_NEXT) { mbuf_tail = mbuf; mbuf = rte_pktmbuf_alloc(mq->mempool); - if (unlikely(mbuf == NULL)) + if (unlikely(mbuf == NULL)) { + rte_pktmbuf_free_bulk(mbufs + n_rx_pkts, + nb_pkts - n_rx_pkts); goto no_free_bufs; - mbuf->port = mq->in_port; + } ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); if (unlikely(ret < 0)) { MIF_LOG(ERR, "number-of-segments-overflow"); rte_pktmbuf_free(mbuf); + rte_pktmbuf_free_bulk(mbufs + n_rx_pkts, + nb_pkts - n_rx_pkts); goto no_free_bufs; } + goto next_slot1; } - cp_len = RTE_MIN(dst_len, src_len); - rte_pktmbuf_data_len(mbuf) += cp_len; - rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); - if (mbuf != mbuf_head) - rte_pktmbuf_pkt_len(mbuf_head) += cp_len; + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } - rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, - dst_off), - (uint8_t *)memif_get_buffer(proc_private, d0) + - src_off, cp_len); + if (n_rx_pkts < nb_pkts) + rte_pktmbuf_free_bulk(mbufs + n_rx_pkts, nb_pkts - n_rx_pkts); + } else { + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf_head == NULL)) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; + +next_slot2: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; - src_off += cp_len; - dst_off += cp_len; - src_len -= cp_len; - } while (src_len); + src_len = d0->length; + dst_off = 0; + src_off = 0; - cur_slot++; - n_slots--; + do { + dst_len = mbuf_size - dst_off; + if (dst_len == 0) { + dst_off = 0; + dst_len = mbuf_size; + + /* store pointer to tail */ + mbuf_tail = mbuf; + mbuf = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf == NULL)) + goto no_free_bufs; + mbuf->port = mq->in_port; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "number-of-segments-overflow"); + rte_pktmbuf_free(mbuf); + goto no_free_bufs; + } + } + cp_len = RTE_MIN(dst_len, src_len); - if (d0->flags & MEMIF_DESC_FLAG_NEXT) - goto next_slot; + rte_pktmbuf_data_len(mbuf) += cp_len; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; - mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); - *bufs++ = mbuf_head; - n_rx_pkts++; + rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, + dst_off), + (uint8_t *)memif_get_buffer(proc_private, d0) + + src_off, cp_len); + + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + } while (src_len); + + cur_slot++; + n_slots--; + + if (d0->flags & MEMIF_DESC_FLAG_NEXT) + goto next_slot2; + + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } } no_free_bufs: @@ -694,7 +750,6 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return n_tx_pkts; } - static int memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq, memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask, -- 2.25.1