From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wm0-f51.google.com (mail-wm0-f51.google.com [74.125.82.51]) by dpdk.org (Postfix) with ESMTP id 5F1B7C72C for ; Fri, 24 Jun 2016 10:53:57 +0200 (CEST) Received: by mail-wm0-f51.google.com with SMTP id r201so15986989wme.1 for ; Fri, 24 Jun 2016 01:53:57 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=6wind-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=o4oMMIioTXy3NucUBPBHCul7zRFDB8EjVn0D0xF7Y4A=; b=0nmrcyBUwxaQDLSF3V6vEz1+IQDRPPfHVpkwBgAXgr1GOdpvp8OZLvSCkeZtEiIeRZ G4LAs9Lw87MHZt9TXu4bFp8u6uBY3af0awbbIC0Mjuud1JXnoaoRMhWviSDrQI7bPGqw ZTJf7xmanZf6K91d+nzR/rNTVvy//jZBMC/KnQ9r2JJwM9701GbV2Xb6g+8Lru2TLqVW GvxkGA3wIPy2OOM7Mxm/QWKiEAGdfplPPzER/GWxDvSZmomwEGWkwzYVqWDH0TNEH3aY KZqORhDKe5ozsFfy5ty/lRrUI3sluMcn8kXJkBRBKJdl4EILRZkqKy5jF4T45e7DS2GN ASIg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=o4oMMIioTXy3NucUBPBHCul7zRFDB8EjVn0D0xF7Y4A=; b=LHFnINl2J04m55WC6VHGPlLv/Ry3WuEJiBEl7PoXsx997Ta1uUQQCjjgyx6QI9fI04 WwyFneJE/8PC8F+neZFiYhL8/3wkQG3EiQVvjfndnI1eTIRsjZE4t1enmG0nCDeQ9mKl p/SJ4q/s3gOdZPQ3lRkNq9kvEB5CO9njciLzik3z2iZ9ceCkcktB/ebpRP4I4Vtwp5kh xRw+uTCcPp+XKQlIAPS9rpTL28tZ0q0BDBHImWSjPgt0jCJXQgiU37EoJ67nSMg5XOO6 sYSvuLo3BYH1iwNQji8oNGRx7WJxHWi2vMKux+0KwTq+MHpWsCwsd3E37l5uHJLfuw60 tyaQ== X-Gm-Message-State: ALyK8tJQyWNNeTdKEfgfECzFwMBuJj2e5DgtF+85fsIO66CSVMi+tfWWZC3v1cj2anakUow0 X-Received: by 10.194.153.98 with SMTP id vf2mr2594365wjb.72.1466758436658; Fri, 24 Jun 2016 01:53:56 -0700 (PDT) Received: from ping.vm.6wind.com (guy78-3-82-239-227-177.fbx.proxad.net. [82.239.227.177]) by smtp.gmail.com with ESMTPSA id t188sm2060584wma.8.2016.06.24.01.53.55 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Fri, 24 Jun 2016 01:53:56 -0700 (PDT) From: Nelio Laranjeiro To: dev@dpdk.org Cc: Bruce Richardson , Ferruh Yigit , Adrien Mazarguil Date: Fri, 24 Jun 2016 10:50:57 +0200 Message-Id: <1466758261-25986-22-git-send-email-nelio.laranjeiro@6wind.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1466758261-25986-1-git-send-email-nelio.laranjeiro@6wind.com> References: <1466700801-10383-1-git-send-email-nelio.laranjeiro@6wind.com> <1466758261-25986-1-git-send-email-nelio.laranjeiro@6wind.com> Subject: [dpdk-dev] [PATCH v6 21/25] mlx5: resurrect Tx gather support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 24 Jun 2016 08:53:57 -0000 From: Adrien Mazarguil Compared to its previous incarnation, the software limit on the number of mbuf segments is no more (previously MLX5_PMD_SGE_WR_N, set to 4 by default) hence no need for linearization code and related buffers that permanently consumed a non negligible amount of memory to handle oversized mbufs. The resulting code is both lighter and faster. Signed-off-by: Adrien Mazarguil Signed-off-by: Nelio Laranjeiro --- drivers/net/mlx5/mlx5_rxtx.c | 235 +++++++++++++++++++++++++++++++++---------- drivers/net/mlx5/mlx5_txq.c | 8 +- 2 files changed, 188 insertions(+), 55 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index fadc182..c72e7ce 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -303,6 +303,7 @@ mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe, { wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); + wqe->wqe.ctrl.data[2] = 0; wqe->wqe.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -348,6 +349,7 @@ mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); + wqe->wqe.ctrl.data[2] = 0; wqe->wqe.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -425,6 +427,7 @@ mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe, assert(size < 64); wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); + wqe->inl.ctrl.data[2] = 0; wqe->inl.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -498,6 +501,7 @@ mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, assert(size < 64); wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); + wqe->inl.ctrl.data[2] = 0; wqe->inl.ctrl.data[3] = 0; wqe->inl.eseg.rsvd0 = 0; wqe->inl.eseg.rsvd1 = 0; @@ -586,6 +590,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; volatile union mlx5_wqe *wqe; @@ -602,23 +607,27 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf; + struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; uintptr_t addr; uint32_t length; uint32_t lkey; + unsigned int segs_n = buf->nb_segs; + volatile struct mlx5_wqe_data_seg *dseg; + unsigned int ds = sizeof(*wqe) / 16; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. */ - if (max < 1 + 1) + assert(segs_n); + if (max < segs_n + 1) break; - --max; + max -= segs_n; --pkts_n; - buf = *(pkts++); elts_head_next = (elts_head + 1) & (elts_n - 1); wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; + dseg = &wqe->wqe.dseg; rte_prefetch0(wqe); if (pkts_n) rte_prefetch0(*pkts); @@ -638,7 +647,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) buf->vlan_tci); else mlx5_wqe_write(txq, wqe, addr, length, lkey); - wqe->wqe.ctrl.data[2] = 0; /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { @@ -648,6 +656,37 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } else { wqe->wqe.eseg.cs_flags = 0; } + while (--segs_n) { + /* + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. + */ + assert(!(sizeof(*wqe) % sizeof(*dseg))); + if (!(ds % (sizeof(*wqe) / 16))) + dseg = (volatile void *) + &(*txq->wqes)[txq->wqe_ci++ & + (txq->wqe_n - 1)]; + else + ++dseg; + ++ds; + buf = buf->next; + assert(buf); + /* Store segment information. */ + dseg->byte_count = htonl(DATA_LEN(buf)); + dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); + dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + (*txq->elts)[elts_head_next] = buf; + elts_head_next = (elts_head_next + 1) & (elts_n - 1); +#ifdef MLX5_PMD_SOFT_COUNTERS + length += DATA_LEN(buf); +#endif + ++j; + } + /* Update DS field in WQE. */ + wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0); + wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f); + elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ txq->stats.obytes += length; @@ -659,7 +698,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ wqe->wqe.ctrl.data[2] = htonl(8); @@ -699,6 +738,7 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; volatile union mlx5_wqe *wqe; @@ -716,23 +756,27 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf; + struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; uintptr_t addr; uint32_t length; uint32_t lkey; + unsigned int segs_n = buf->nb_segs; + volatile struct mlx5_wqe_data_seg *dseg; + unsigned int ds = sizeof(*wqe) / 16; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. */ - if (max < 1 + 1) + assert(segs_n); + if (max < segs_n + 1) break; - --max; + max -= segs_n; --pkts_n; - buf = *(pkts++); elts_head_next = (elts_head + 1) & (elts_n - 1); wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; + dseg = &wqe->wqe.dseg; tx_prefetch_wqe(txq, txq->wqe_ci); tx_prefetch_wqe(txq, txq->wqe_ci + 1); if (pkts_n) @@ -755,13 +799,14 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (pkts_n) rte_prefetch0(rte_pktmbuf_mtod(*pkts, volatile void *)); - if (length <= max_inline) { + if ((length <= max_inline) && (segs_n == 1)) { if (buf->ol_flags & PKT_TX_VLAN_PKT) mlx5_wqe_write_inline_vlan(txq, wqe, addr, length, buf->vlan_tci); else mlx5_wqe_write_inline(txq, wqe, addr, length); + goto skip_segs; } else { /* Retrieve Memory Region key for this memory pool. */ lkey = txq_mp2mr(txq, txq_mb2mp(buf)); @@ -771,7 +816,37 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) else mlx5_wqe_write(txq, wqe, addr, length, lkey); } - wqe->inl.ctrl.data[2] = 0; + while (--segs_n) { + /* + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. + */ + assert(!(sizeof(*wqe) % sizeof(*dseg))); + if (!(ds % (sizeof(*wqe) / 16))) + dseg = (volatile void *) + &(*txq->wqes)[txq->wqe_ci++ & + (txq->wqe_n - 1)]; + else + ++dseg; + ++ds; + buf = buf->next; + assert(buf); + /* Store segment information. */ + dseg->byte_count = htonl(DATA_LEN(buf)); + dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); + dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + (*txq->elts)[elts_head_next] = buf; + elts_head_next = (elts_head_next + 1) & (elts_n - 1); +#ifdef MLX5_PMD_SOFT_COUNTERS + length += DATA_LEN(buf); +#endif + ++j; + } + /* Update DS field in WQE. */ + wqe->inl.ctrl.data[1] &= htonl(0xffffffc0); + wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f); +skip_segs: elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -783,7 +858,7 @@ mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ wqe->inl.ctrl.data[2] = htonl(8); @@ -890,6 +965,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; struct mlx5_mpw mpw = { @@ -908,48 +984,69 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf; - volatile struct mlx5_wqe_data_seg *dseg; + struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; - uintptr_t addr; uint32_t length; + unsigned int segs_n = buf->nb_segs; uint32_t cs_flags = 0; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. */ - if (max < 1 + 1) + assert(segs_n); + if (max < segs_n + 1) break; - --max; + /* Do not bother with large packets MPW cannot handle. */ + if (segs_n > MLX5_MPW_DSEG_MAX) + break; + max -= segs_n; --pkts_n; - buf = *(pkts++); - elts_head_next = (elts_head + 1) & (elts_n - 1); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - /* Retrieve buffer information. */ - addr = rte_pktmbuf_mtod(buf, uintptr_t); - length = DATA_LEN(buf); - /* Update element. */ - (*txq->elts)[elts_head] = buf; + /* Retrieve packet information. */ + length = PKT_LEN(buf); + assert(length); /* Start new session if packet differs. */ if ((mpw.state == MLX5_MPW_STATE_OPENED) && ((mpw.len != length) || + (segs_n != 1) || (mpw.wqe->mpw.eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { mlx5_mpw_new(txq, &mpw, length); mpw.wqe->mpw.eseg.cs_flags = cs_flags; } - dseg = mpw.data.dseg[mpw.pkts_n]; - *dseg = (struct mlx5_wqe_data_seg){ - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), - .addr = htonll(addr), - }; - ++mpw.pkts_n; + /* Multi-segment packets must be alone in their MPW. */ + assert((segs_n == 1) || (mpw.pkts_n == 0)); +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length = 0; +#endif + do { + volatile struct mlx5_wqe_data_seg *dseg; + uintptr_t addr; + + elts_head_next = (elts_head + 1) & (elts_n - 1); + assert(buf); + (*txq->elts)[elts_head] = buf; + dseg = mpw.data.dseg[mpw.pkts_n]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + *dseg = (struct mlx5_wqe_data_seg){ + .byte_count = htonl(DATA_LEN(buf)), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .addr = htonll(addr), + }; + elts_head = elts_head_next; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length += DATA_LEN(buf); +#endif + buf = buf->next; + ++mpw.pkts_n; + ++j; + } while (--segs_n); + assert(length == mpw.len); if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) mlx5_mpw_close(txq, &mpw); elts_head = elts_head_next; @@ -963,7 +1060,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + /* "j" includes both packets and segments. */ + comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { volatile union mlx5_wqe *wqe = mpw.wqe; @@ -1067,6 +1165,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; unsigned int i = 0; + unsigned int j = 0; unsigned int max; unsigned int comp; unsigned int inline_room = txq->max_inline; @@ -1086,38 +1185,40 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf; + struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; uintptr_t addr; uint32_t length; + unsigned int segs_n = buf->nb_segs; uint32_t cs_flags = 0; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. */ - if (max < 1 + 1) + assert(segs_n); + if (max < segs_n + 1) + break; + /* Do not bother with large packets MPW cannot handle. */ + if (segs_n > MLX5_MPW_DSEG_MAX) break; - --max; + max -= segs_n; --pkts_n; - buf = *(pkts++); - elts_head_next = (elts_head + 1) & (elts_n - 1); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - /* Retrieve buffer information. */ - addr = rte_pktmbuf_mtod(buf, uintptr_t); - length = DATA_LEN(buf); - /* Update element. */ - (*txq->elts)[elts_head] = buf; + /* Retrieve packet information. */ + length = PKT_LEN(buf); /* Start new session if packet differs. */ if (mpw.state == MLX5_MPW_STATE_OPENED) { if ((mpw.len != length) || + (segs_n != 1) || (mpw.wqe->mpw.eseg.cs_flags != cs_flags)) mlx5_mpw_close(txq, &mpw); } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { if ((mpw.len != length) || + (segs_n != 1) || (length > inline_room) || (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) { mlx5_mpw_inline_close(txq, &mpw); @@ -1125,7 +1226,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, } } if (mpw.state == MLX5_MPW_STATE_CLOSED) { - if (length > inline_room) { + if ((segs_n != 1) || + (length > inline_room)) { mlx5_mpw_new(txq, &mpw, length); mpw.wqe->mpw.eseg.cs_flags = cs_flags; } else { @@ -1133,17 +1235,36 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags; } } + /* Multi-segment packets must be alone in their MPW. */ + assert((segs_n == 1) || (mpw.pkts_n == 0)); if (mpw.state == MLX5_MPW_STATE_OPENED) { - volatile struct mlx5_wqe_data_seg *dseg; - assert(inline_room == txq->max_inline); - dseg = mpw.data.dseg[mpw.pkts_n]; - *dseg = (struct mlx5_wqe_data_seg){ - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), - .addr = htonll(addr), - }; - ++mpw.pkts_n; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length = 0; +#endif + do { + volatile struct mlx5_wqe_data_seg *dseg; + + elts_head_next = + (elts_head + 1) & (elts_n - 1); + assert(buf); + (*txq->elts)[elts_head] = buf; + dseg = mpw.data.dseg[mpw.pkts_n]; + addr = rte_pktmbuf_mtod(buf, uintptr_t); + *dseg = (struct mlx5_wqe_data_seg){ + .byte_count = htonl(DATA_LEN(buf)), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .addr = htonll(addr), + }; + elts_head = elts_head_next; +#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) + length += DATA_LEN(buf); +#endif + buf = buf->next; + ++mpw.pkts_n; + ++j; + } while (--segs_n); + assert(length == mpw.len); if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) mlx5_mpw_close(txq, &mpw); } else { @@ -1151,6 +1272,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); assert(length <= inline_room); + assert(length == DATA_LEN(buf)); + elts_head_next = (elts_head + 1) & (elts_n - 1); + addr = rte_pktmbuf_mtod(buf, uintptr_t); + (*txq->elts)[elts_head] = buf; /* Maximum number of bytes before wrapping. */ max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] - (uintptr_t)mpw.data.raw); @@ -1175,6 +1300,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mpw.data.raw = (volatile void *)&(*txq->wqes)[0]; ++mpw.pkts_n; + ++j; if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { mlx5_mpw_inline_close(txq, &mpw); inline_room = txq->max_inline; @@ -1194,7 +1320,8 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (unlikely(i == 0)) return 0; /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i; + /* "j" includes both packets and segments. */ + comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { volatile union mlx5_wqe *wqe = mpw.wqe; diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index bae9f3d..6fe61c4 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -320,7 +320,13 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ? priv->device_attr.max_qp_wr : desc), - /* Max number of scatter/gather elements in a WR. */ + /* + * Max number of scatter/gather elements in a WR, + * must be 1 to prevent libmlx5 from trying to affect + * too much memory. TX gather is not impacted by the + * priv->device_attr.max_sge limit and will still work + * properly. + */ .max_send_sge = 1, }, .qp_type = IBV_QPT_RAW_PACKET, -- 2.1.4