From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 33B2AA04FD; Thu, 24 Mar 2022 23:11:42 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id F0A4540683; Thu, 24 Mar 2022 23:11:41 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 1C53740150 for ; Thu, 24 Mar 2022 23:11:40 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 815BB1570; Thu, 24 Mar 2022 15:11:39 -0700 (PDT) Received: from n1sdp-1.usa.Arm.com (n1sdp-1.usa.arm.com [10.118.91.53]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 6BA7C3F66F; Thu, 24 Mar 2022 15:11:39 -0700 (PDT) From: Kathleen Capella To: Jingjing Wu , Beilei Xing Cc: dev@dpdk.org, nd@arm.com, honnappa.nagarahalli@arm.com, dharmik.thakkar@arm.com, Kathleen Capella Subject: [PATCH] net/iavf: remove extra copy step in Rx bulk path Date: Thu, 24 Mar 2022 22:11:32 +0000 Message-Id: <20220324221132.10055-1-kathleen.capella@arm.com> X-Mailer: git-send-email 2.17.1 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org In the Rx bulk path, packets which are taken from the HW ring, are first copied to the stage data structure and then later copied from the stage to the rx_pkts array. For the number of packets requested immediately by the receiving function, this two-step process adds extra overhead that is not necessary. Instead, put requested number of packets directly into the rx_pkts array and only stage excess packets. On N1SDP with 1 core/port, l3fwd saw up to 4% performance improvement. On x86, no difference in performance was observed. Signed-off-by: Kathleen Capella Suggested-by: Dharmik Thakkar --- drivers/net/iavf/iavf_rxtx.c | 74 ++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 16e8d021f9..245dd225fd 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -1813,7 +1813,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, #define IAVF_LOOK_AHEAD 8 static inline int -iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) +iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) { volatile union iavf_rx_flex_desc *rxdp; struct rte_mbuf **rxep; @@ -1822,6 +1824,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) uint16_t pkt_len; int32_t s[IAVF_LOOK_AHEAD], var, nb_dd; int32_t i, j, nb_rx = 0; + int32_t nb_staged = 0; uint64_t pkt_flags; const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; @@ -1867,8 +1870,6 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) #endif } - nb_rx += nb_dd; - /* Translate descriptor info to mbuf parameters */ for (j = 0; j < nb_dd; j++) { IAVF_DUMP_RX_DESC(rxq, &rxdp[j], @@ -1892,24 +1893,34 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); mb->ol_flags |= pkt_flags; - } - for (j = 0; j < IAVF_LOOK_AHEAD; j++) - rxq->rx_stage[i + j] = rxep[j]; + /* Put up to nb_pkts directly into buffers */ + if ((i + j) < nb_pkts) { + rx_pkts[i + j] = rxep[j]; + nb_rx++; + } else { + /* Stage excess pkts received */ + rxq->rx_stage[nb_staged] = rxep[j]; + nb_staged++; + } + } if (nb_dd != IAVF_LOOK_AHEAD) break; } + /* Update rxq->rx_nb_avail to reflect number of staged pkts */ + rxq->rx_nb_avail = nb_staged; + /* Clear software ring entries */ - for (i = 0; i < nb_rx; i++) + for (i = 0; i < (nb_rx + nb_staged); i++) rxq->sw_ring[rxq->rx_tail + i] = NULL; return nb_rx; } static inline int -iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) +iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { volatile union iavf_rx_desc *rxdp; struct rte_mbuf **rxep; @@ -1919,6 +1930,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) uint32_t rx_status; int32_t s[IAVF_LOOK_AHEAD], var, nb_dd; int32_t i, j, nb_rx = 0; + int32_t nb_staged = 0; uint64_t pkt_flags; const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; @@ -1970,8 +1982,6 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) #endif } - nb_rx += nb_dd; - /* Translate descriptor info to mbuf parameters */ for (j = 0; j < nb_dd; j++) { IAVF_DUMP_RX_DESC(rxq, &rxdp[j], @@ -2000,17 +2010,26 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb); mb->ol_flags |= pkt_flags; - } - for (j = 0; j < IAVF_LOOK_AHEAD; j++) - rxq->rx_stage[i + j] = rxep[j]; + /* Put up to nb_pkts directly into buffers */ + if ((i + j) < nb_pkts) { + rx_pkts[i + j] = rxep[j]; + nb_rx++; + } else { /* Stage excess pkts received */ + rxq->rx_stage[nb_staged] = rxep[j]; + nb_staged++; + } + } if (nb_dd != IAVF_LOOK_AHEAD) break; } + /* Update rxq->rx_nb_avail to reflect number of staged pkts */ + rxq->rx_nb_avail = nb_staged; + /* Clear software ring entries */ - for (i = 0; i < nb_rx; i++) + for (i = 0; i < (nb_rx + nb_staged); i++) rxq->sw_ring[rxq->rx_tail + i] = NULL; return nb_rx; @@ -2098,23 +2117,31 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts); if (rxq->rxdid >= IAVF_RXDID_FLEX_NIC && rxq->rxdid <= IAVF_RXDID_LAST) - nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq); + nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq, rx_pkts, nb_pkts); else - nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq); + nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq, rx_pkts, nb_pkts); + rxq->rx_next_avail = 0; - rxq->rx_nb_avail = nb_rx; - rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx); + rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx + rxq->rx_nb_avail); if (rxq->rx_tail > rxq->rx_free_trigger) { if (iavf_rx_alloc_bufs(rxq) != 0) { - uint16_t i, j; + uint16_t i, j, nb_staged; /* TODO: count rx_mbuf_alloc_failed here */ + nb_staged = rxq->rx_nb_avail; rxq->rx_nb_avail = 0; - rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx); - for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) + + rxq->rx_tail = (uint16_t)(rxq->rx_tail - (nb_rx + nb_staged)); + for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++) { + rxq->sw_ring[j] = rx_pkts[i]; + rx_pkts[i] = NULL; + } + for (i = 0, j = rxq->rx_tail + nb_rx; i < nb_staged; i++, j++) { rxq->sw_ring[j] = rxq->rx_stage[i]; + rx_pkts[i] = NULL; + } return 0; } @@ -2127,10 +2154,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxq->port_id, rxq->queue_id, rxq->rx_tail, nb_rx); - if (rxq->rx_nb_avail) - return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts); - - return 0; + return nb_rx; } static uint16_t -- 2.31.1