From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id B0204A04D7 for ; Thu, 3 Sep 2020 00:51:57 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 7E296E07; Thu, 3 Sep 2020 00:51:57 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 9379C137D for ; Thu, 3 Sep 2020 00:51:56 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from akozyrev@nvidia.com) with SMTP; 3 Sep 2020 01:51:55 +0300 Received: from nvidia.com (pegasus02.mtr.labs.mlnx [10.210.16.122]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 082MptUo031449; Thu, 3 Sep 2020 01:51:55 +0300 From: Alexander Kozyrev To: stable@dpdk.org Cc: ktraynor@redhat.com, Alexander Kozyrev , Slava Ovsiienko , Matan Azrad Date: Wed, 2 Sep 2020 22:51:48 +0000 Message-Id: <20200902225148.17532-1-akozyrev@nvidia.com> X-Mailer: git-send-email 2.24.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-stable] [PATCH 18.11] net/mlx5: fix vectorized Rx burst termination X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org Sender: "stable" From: Alexander Kozyrev [ upstream commit c9cc554ba423641d3515671269c5648dab3bb9ad ] Maximum burst size of Vectorized Rx burst routine is set to MLX5_VPMD_RX_MAX_BURST(64). This limits the performance of any application that would like to gather more than 64 packets from the single Rx burst for batch processing (i.e. VPP). The situation gets worse with a mix of zipped and unzipped CQEs. They are processed separately and the Rx burst function returns small number of packets every call. Repeat the cycle of gathering packets from the vectorized Rx routine until a requested number of packets are collected or there are no more CQEs left to process. Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86") Cc: stable@dpdk.org Signed-off-by: Alexander Kozyrev Acked-by: Slava Ovsiienko Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_rxtx_vec.c | 17 ++++++++++++----- drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 13 ++++++++++--- drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 13 ++++++++++--- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index b8d624d14..ca2389b5d 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -232,13 +232,20 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct mlx5_rxq_data *rxq = dpdk_rxq; - uint16_t nb_rx; + uint16_t nb_rx = 0; + uint16_t tn = 0; uint64_t err = 0; + bool no_cq = false; - nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err); - if (unlikely(err)) - nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx); - return nb_rx; + do { + nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err, &no_cq); + if (unlikely(err)) + nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); + tn += nb_rx; + if (unlikely(no_cq)) + break; + } while (tn != pkts_n); + return tn; } /** diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index cdf9c65f1..566b3ea24 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -641,13 +641,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, * @param[out] err * Pointer to a flag. Set non-zero value if pkts array has at least one error * packet to handle. + * @param[out] no_cq + * Pointer to a boolean. Set true if no new CQE seen. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint64_t *err) + uint64_t *err, bool *no_cq) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -754,8 +756,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); - if (!pkts_n) + if (!pkts_n) { + *no_cq = !rcvd_pkt; return rcvd_pkt; + } /* At this point, there shouldn't be any remained packets. */ assert(rxq->rq_pi == rxq->cq_ci); /* @@ -989,8 +993,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, break; } /* If no new CQE seen, return without updating cq_db. */ - if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) + if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { + *no_cq = true; return rcvd_pkt; + } /* Update the consumer indexes for non-compressed CQEs. */ assert(nocmp_n <= pkts_n); rxq->cq_ci += nocmp_n; @@ -1016,6 +1022,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, } rte_cio_wmb(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + *no_cq = !rcvd_pkt; return rcvd_pkt; } diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index fb384efde..b83fd7f9b 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -643,13 +643,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], * @param[out] err * Pointer to a flag. Set non-zero value if pkts array has at least one error * packet to handle. + * @param[out] no_cq + * Pointer to a boolean. Set true if no new CQE seen. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint64_t *err) + uint64_t *err, bool *no_cq) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -737,8 +739,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); - if (!pkts_n) + if (!pkts_n) { + *no_cq = !rcvd_pkt; return rcvd_pkt; + } /* At this point, there shouldn't be any remained packets. */ assert(rxq->rq_pi == rxq->cq_ci); /* @@ -939,8 +943,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, break; } /* If no new CQE seen, return without updating cq_db. */ - if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) + if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { + *no_cq = true; return rcvd_pkt; + } /* Update the consumer indexes for non-compressed CQEs. */ assert(nocmp_n <= pkts_n); rxq->cq_ci += nocmp_n; @@ -966,6 +972,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, } rte_compiler_barrier(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + *no_cq = !rcvd_pkt; return rcvd_pkt; } -- 2.24.1