DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512
@ 2021-03-12  1:27 Wenzhuo Lu
  2021-03-15 17:38 ` Coyle, David
  0 siblings, 1 reply; 3+ messages in thread
From: Wenzhuo Lu @ 2021-03-12  1:27 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 128 ++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 862c916..36521da 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -32,6 +32,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		goto normal;
+
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
 	 * from the cache into the shadow ring.
@@ -132,7 +135,132 @@
 #endif
 		rxep += 8, rxdp += 8, cache->len -= 8;
 	}
+	goto done;
+
+normal:
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_I40E_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
+			i += 8, rxep += 8, rxdp += 8) {
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+		mb2 = rxep[2].mbuf;
+		mb3 = rxep[3].mbuf;
+		mb4 = rxep[4].mbuf;
+		mb5 = rxep[5].mbuf;
+		mb6 = rxep[6].mbuf;
+		mb7 = rxep[7].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+		/* add headroom to pa values */
+		dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+		_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+	}
+#endif
 
+done:
 	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
-- 
1.9.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512
  2021-03-12  1:27 [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512 Wenzhuo Lu
@ 2021-03-15 17:38 ` Coyle, David
  2021-03-26  1:50   ` Lu, Wenzhuo
  0 siblings, 1 reply; 3+ messages in thread
From: Coyle, David @ 2021-03-15 17:38 UTC (permalink / raw)
  To: Lu, Wenzhuo, dev; +Cc: Lu, Wenzhuo, stable

Hi Wenzhuo

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Wenzhuo Lu
> Sent: Friday, March 12, 2021 1:27 AM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512
> 
> Fix segment fault when failing to get the memory from the pool.
> 
> Fixes: e6a6a138919f ("net/i40e: add AVX512 vector path")
> Cc: stable@dpdk.org
> 
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/i40e/i40e_rxtx_vec_avx512.c | 128
> ++++++++++++++++++++++++++++++++
>  1 file changed, 128 insertions(+)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> index 862c916..36521da 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
> @@ -32,6 +32,9 @@
> 
>  	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> 
> +	if (!cache)
> +		goto normal;

[DC] Like in IAVF and ICE, should we also check for cache->len == 0, like is done in Tx path?

> +
>  	/* We need to pull 'n' more MBUFs into the software ring from
> mempool
>  	 * We inline the mempool function here, so we can vectorize the
> copy
>  	 * from the cache into the shadow ring.
> @@ -132,7 +135,132 @@
>  #endif
>  		rxep += 8, rxdp += 8, cache->len -= 8;
>  	}
> +	goto done;
> +
> +normal:
> +	/* Pull 'n' more MBUFs into the software ring */
> +	if (rte_mempool_get_bulk(rxq->mp,
> +				 (void *)rxep,
> +				 RTE_I40E_RXQ_REARM_THRESH) < 0) {
> +		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
> +		    rxq->nb_rx_desc) {
> +			__m128i dma_addr0;
> +
> +			dma_addr0 = _mm_setzero_si128();
> +			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
> +				rxep[i].mbuf = &rxq->fake_mbuf;
> +				_mm_store_si128((__m128i *)&rxdp[i].read,
> +						dma_addr0);
> +			}
> +		}
> +		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed
> +=
> +			RTE_I40E_RXQ_REARM_THRESH;
> +		return;
> +	}
> +
> +#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
> +	struct rte_mbuf *mb0, *mb1;
> +	__m128i dma_addr0, dma_addr1;
> +	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
> +			RTE_PKTMBUF_HEADROOM);
> +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */

[DC] Comment should say 2 mbufs

> +	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
> +		__m128i vaddr0, vaddr1;
> +
> +		mb0 = rxep[0].mbuf;
> +		mb1 = rxep[1].mbuf;
> +
> +		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
> +		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
> +				offsetof(struct rte_mbuf, buf_addr) + 8);
> +		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
> +		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
> +
> +		/* convert pa to dma_addr hdr/data */
> +		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
> +		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
> +
> +		/* add headroom to pa values */
> +		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
> +		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
> +
> +		/* flush desc with pa dma_addr */
> +		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
> +		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
> +	}
> +#else
> +	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
> +	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
> +	__m512i dma_addr0_3, dma_addr4_7;
> +	__m512i hdr_room =
> _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
> +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */

[DC] Comment should say 8 mbufs

> +	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH;
> +			i += 8, rxep += 8, rxdp += 8) {
> +		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
> +		__m128i vaddr4, vaddr5, vaddr6, vaddr7;

<snip>

> +		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
> +		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
> +
> +		/**
> +		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
> +		 * into the high lanes. Similarly for 2 & 3
> +		 */

[DC] Comment should say "Similarly for 2 & 3, 4 & 5, 6 & 7"

> +		vaddr0_1 =
> +
> 	_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
> +						vaddr1, 1);

<snip>

> +		/* flush desc with pa dma_addr */
> +		_mm512_store_si512((__m512i *)&rxdp->read,
> dma_addr0_3);
> +		_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
> dma_addr4_7);
> +	}
> +#endif

[DC] Again, there's common code here with the avx2 file and also with the IAVF and ICE PMDs.

As I said in other reviews, maybe it's not practical to share code across PMDs.
But might be good to have some common functions within each PMD for avx2 and avx512 paths

> 
> +done:
>  	rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH;
>  	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
>  		rxq->rxrearm_start = 0;

The patch fixes the seg fault, but note I have only tested the default '#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC ' path

Tested-by: David Coyle <david.coyle@intel.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512
  2021-03-15 17:38 ` Coyle, David
@ 2021-03-26  1:50   ` Lu, Wenzhuo
  0 siblings, 0 replies; 3+ messages in thread
From: Lu, Wenzhuo @ 2021-03-26  1:50 UTC (permalink / raw)
  To: Coyle, David, dev; +Cc: stable

Hi David,
Sorry for the late response.

> > +	if (!cache)
> > +		goto normal;
> 
> [DC] Like in IAVF and ICE, should we also check for cache->len == 0, like is
> done in Tx path?
Not necessary because below code tries to get more buffer is length is not as long as needed, it covers the case even if the length is 0.

> > +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
> 
> [DC] Comment should say 2 mbufs
Thanks. Will correct it.

> > _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
> > +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
> 
> [DC] Comment should say 8 mbufs
Thanks. Will correct it.

> > +		/**
> > +		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
> > +		 * into the high lanes. Similarly for 2 & 3
> > +		 */
> 
> [DC] Comment should say "Similarly for 2 & 3, 4 & 5, 6 & 7"
Thanks. Will correct it.

> 
> > +		vaddr0_1 =
> > +
> > 	_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
> > +						vaddr1, 1);
> 
> <snip>
> 
> > +		/* flush desc with pa dma_addr */
> > +		_mm512_store_si512((__m512i *)&rxdp->read,
> > dma_addr0_3);
> > +		_mm512_store_si512((__m512i *)&(rxdp + 4)->read,
> > dma_addr4_7);
> > +	}
> > +#endif
> 
> [DC] Again, there's common code here with the avx2 file and also with the
> IAVF and ICE PMDs.
> 
> As I said in other reviews, maybe it's not practical to share code across PMDs.
> But might be good to have some common functions within each PMD for
> avx2 and avx512 paths
Most of the code looks same but not all. In this avx512 file, some avx512 instructions are used to replace the avx2 ones.
Let me check if some common code can be saved.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-03-26  1:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-12  1:27 [dpdk-dev] [PATCH 3/3] net/i40e: fix segment fault in AVX512 Wenzhuo Lu
2021-03-15 17:38 ` Coyle, David
2021-03-26  1:50   ` Lu, Wenzhuo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).