DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512
@ 2021-03-12  1:27 Wenzhuo Lu
  2021-03-15 17:25 ` Coyle, David
  0 siblings, 1 reply; 3+ messages in thread
From: Wenzhuo Lu @ 2021-03-12  1:27 UTC (permalink / raw)
  To: dev; +Cc: Wenzhuo Lu, stable

Fix segment fault when failing to get the memory from the pool.

Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
Cc: stable@dpdk.org

Reported-by: David Coyle <David.Coyle@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx512.c | 129 ++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 0e5a676..7c458d5 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -24,6 +24,9 @@
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
 
+	if (!cache)
+		goto normal;
+
 	/* We need to pull 'n' more MBUFs into the software ring */
 	if (cache->len < ICE_RXQ_REARM_THRESH) {
 		uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -
@@ -115,6 +118,132 @@
 		rxep += 8, rxdp += 8, cache->len -= 8;
 	}
 
+	goto done;
+
+normal:
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)rxep,
+				 ICE_RXQ_REARM_THRESH) < 0) {
+		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+		    rxq->nb_rx_desc) {
+			__m128i dma_addr0;
+
+			dma_addr0 = _mm_setzero_si128();
+			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+				rxep[i].mbuf = &rxq->fake_mbuf;
+				_mm_store_si128((__m128i *)&rxdp[i].read,
+						dma_addr0);
+			}
+		}
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			ICE_RXQ_REARM_THRESH;
+		return;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	struct rte_mbuf *mb0, *mb1;
+	__m128i dma_addr0, dma_addr1;
+	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
+			RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
+		__m128i vaddr0, vaddr1;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
+		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
+	}
+#else
+	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
+	__m512i dma_addr0_3, dma_addr4_7;
+	__m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
+	/* Initialize the mbufs in vector, process 4 mbufs in one loop */
+	for (i = 0; i < ICE_RXQ_REARM_THRESH;
+			i += 8, rxep += 8, rxdp += 8) {
+		__m128i vaddr0, vaddr1, vaddr2, vaddr3;
+		__m128i vaddr4, vaddr5, vaddr6, vaddr7;
+		__m256i vaddr0_1, vaddr2_3;
+		__m256i vaddr4_5, vaddr6_7;
+		__m512i vaddr0_3, vaddr4_7;
+
+		mb0 = rxep[0].mbuf;
+		mb1 = rxep[1].mbuf;
+		mb2 = rxep[2].mbuf;
+		mb3 = rxep[3].mbuf;
+		mb4 = rxep[4].mbuf;
+		mb5 = rxep[5].mbuf;
+		mb6 = rxep[6].mbuf;
+		mb7 = rxep[7].mbuf;
+
+		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
+				offsetof(struct rte_mbuf, buf_addr) + 8);
+		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
+		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
+		vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
+		vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
+		vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
+		vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
+		vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
+		vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
+
+		/**
+		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
+		 * into the high lanes. Similarly for 2 & 3
+		 */
+		vaddr0_1 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
+						vaddr1, 1);
+		vaddr2_3 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
+						vaddr3, 1);
+		vaddr4_5 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
+						vaddr5, 1);
+		vaddr6_7 =
+			_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
+						vaddr7, 1);
+		vaddr0_3 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
+						vaddr2_3, 1);
+		vaddr4_7 =
+			_mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
+						vaddr6_7, 1);
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
+		dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
+
+		/* add headroom to pa values */
+		dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
+		dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
+
+		/* flush desc with pa dma_addr */
+		_mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
+		_mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
+	}
+#endif
+
+done:
 	rxq->rxrearm_start += ICE_RXQ_REARM_THRESH;
 	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 		rxq->rxrearm_start = 0;
-- 
1.9.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512
  2021-03-12  1:27 [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512 Wenzhuo Lu
@ 2021-03-15 17:25 ` Coyle, David
  2021-03-26  1:52   ` Lu, Wenzhuo
  0 siblings, 1 reply; 3+ messages in thread
From: Coyle, David @ 2021-03-15 17:25 UTC (permalink / raw)
  To: Lu, Wenzhuo, dev; +Cc: Lu, Wenzhuo, stable

Hi Wenzhuo

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Wenzhuo Lu
> Sent: Friday, March 12, 2021 1:27 AM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512
> 
> Fix segment fault when failing to get the memory from the pool.
> 
> Fixes: 7f85d5ebcfe1 ("net/ice: add AVX512 vector path")
> Cc: stable@dpdk.org
> 
> Reported-by: David Coyle <David.Coyle@intel.com>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/ice/ice_rxtx_vec_avx512.c | 129
> ++++++++++++++++++++++++++++++++++
>  1 file changed, 129 insertions(+)
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c
> b/drivers/net/ice/ice_rxtx_vec_avx512.c
> index 0e5a676..7c458d5 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx512.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
> @@ -24,6 +24,9 @@
> 
>  	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> 
> +	if (!cache)
> +		goto normal;

[DC] Same as IAVF, in the Tx path, in ice_tx_free_bufs_avx512(), it also checks for
cache->len == 0.
Not sure if the extra check is necessary though - I don't know if 'cache' can be valid pointer
but have a length of 0

                if (!cache || cache->len == 0)
                        goto normal;

> +
>  	/* We need to pull 'n' more MBUFs into the software ring */
>  	if (cache->len < ICE_RXQ_REARM_THRESH) {
>  		uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size -
> @@ -115,6 +118,132 @@
>  		rxep += 8, rxdp += 8, cache->len -= 8;
>  	}
> 
> +	goto done;
> +
> +normal:
> +	/* Pull 'n' more MBUFs into the software ring */
> +	if (rte_mempool_get_bulk(rxq->mp,
> +				 (void *)rxep,
> +				 ICE_RXQ_REARM_THRESH) < 0) {
> +		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
> +		    rxq->nb_rx_desc) {
> +			__m128i dma_addr0;
> +
> +			dma_addr0 = _mm_setzero_si128();
> +			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
> +				rxep[i].mbuf = &rxq->fake_mbuf;
> +				_mm_store_si128((__m128i *)&rxdp[i].read,
> +						dma_addr0);
> +			}
> +		}
> +		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed
> +=
> +			ICE_RXQ_REARM_THRESH;
> +		return;
> +	}
> +
> +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +	struct rte_mbuf *mb0, *mb1;
> +	__m128i dma_addr0, dma_addr1;
> +	__m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
> +			RTE_PKTMBUF_HEADROOM);
> +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */

[DC] Comment above should say 2 mbufs

> +	for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
> +		__m128i vaddr0, vaddr1;
> +
> +		mb0 = rxep[0].mbuf;
> +		mb1 = rxep[1].mbuf;
> +
> +		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
> +		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
> +				offsetof(struct rte_mbuf, buf_addr) + 8);
> +		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
> +		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
> +
> +		/* convert pa to dma_addr hdr/data */
> +		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
> +		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
> +
> +		/* add headroom to pa values */
> +		dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
> +		dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
> +
> +		/* flush desc with pa dma_addr */
> +		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
> +		_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
> +	}

[DC] As in IAVF, the code above is the same as in avx2 file... any possibility to have a common function
or functions for the 2 files?

And there is also commonality between IAVF and ICE PMDs.
There doesn't seem to be any shared code between net PMDs at the moment though, so maybe it's
practical to have common functions

> +#else
> +	struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
> +	struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
> +	__m512i dma_addr0_3, dma_addr4_7;
> +	__m512i hdr_room =
> _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
> +	/* Initialize the mbufs in vector, process 4 mbufs in one loop */

[DC] Comment above should say 8 mbufs

> +	for (i = 0; i < ICE_RXQ_REARM_THRESH;
> +			i += 8, rxep += 8, rxdp += 8) {

<snip>

> +
> +		/**
> +		 * merge 0 & 1, by casting 0 to 256-bit and inserting 1
> +		 * into the high lanes. Similarly for 2 & 3
> +		 */

[DC] Comment above should say "Similarly for 2 & 3, 4 & 5, 6 & 7"

> +		vaddr0_1 =
> +
> 	_mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
> +						vaddr1, 1);
> +		vaddr2_3 =

The patch fixes the seg fault, but note I have only tested the default '#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC' path

Tested-by: David Coyle <david.coyle@intel.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512
  2021-03-15 17:25 ` Coyle, David
@ 2021-03-26  1:52   ` Lu, Wenzhuo
  0 siblings, 0 replies; 3+ messages in thread
From: Lu, Wenzhuo @ 2021-03-26  1:52 UTC (permalink / raw)
  To: Coyle, David, dev; +Cc: stable

Hi David,
Thanks for the comments. I see they're the same as the i40e patch. Will handle them in V2.

Best regards
Wenzhuo Lu

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-03-26  1:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-12  1:27 [dpdk-dev] [PATCH 2/3] net/ice: fix segment fault in AVX512 Wenzhuo Lu
2021-03-15 17:25 ` Coyle, David
2021-03-26  1:52   ` Lu, Wenzhuo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).