patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH] net/ice: fix RSS hash offload at vector mode
@ 2020-10-15  6:03 Zhang,Alvin
  0 siblings, 0 replies; 2+ messages in thread
From: Zhang,Alvin @ 2020-10-15  6:03 UTC (permalink / raw)
  To: ZhihongX.Peng; +Cc: Alvin Zhang, stable

From: Alvin Zhang <alvinx.zhang@intel.com>

1. According to whether the RSS offload bit of the received packet is set
   to determin if the RSS hash should be read or not.
2. Simplify the code of reading RSS hash value.

Fixes: 12443386a0b0 ("net/ice: support flex Rx descriptor RxDID22")
Cc: stable@dpdk.org

Signed-off-by: Alvin Zhang <alvinx.zhang@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c | 116 +++++++++---------------------------
 drivers/net/ice/ice_rxtx_vec_sse.c  |  79 +++++++-----------------
 2 files changed, 50 insertions(+), 145 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 5969a30..a47f38c 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -523,94 +523,35 @@
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		/**
-		 * needs to load 2nd 16B of each desc for RSS hash parsing,
-		 * will cause performance drop to get into this context.
-		 */
-		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
-				DEV_RX_OFFLOAD_RSS_HASH) {
-			/* load bottom half of every 32B desc */
-			const __m128i raw_desc_bh7 =
-				_mm_load_si128
-					((void *)(&rxdp[7].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh6 =
-				_mm_load_si128
-					((void *)(&rxdp[6].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh5 =
-				_mm_load_si128
-					((void *)(&rxdp[5].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh4 =
-				_mm_load_si128
-					((void *)(&rxdp[4].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh3 =
-				_mm_load_si128
-					((void *)(&rxdp[3].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh2 =
-				_mm_load_si128
-					((void *)(&rxdp[2].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh1 =
-				_mm_load_si128
-					((void *)(&rxdp[1].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh0 =
-				_mm_load_si128
-					((void *)(&rxdp[0].wb.status_error1));
+		const __m256i dd_status = _mm256_and_si256(status0_7, dd_check);
 
-			__m256i raw_desc_bh6_7 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh6),
-					raw_desc_bh7, 1);
-			__m256i raw_desc_bh4_5 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh4),
-					raw_desc_bh5, 1);
-			__m256i raw_desc_bh2_3 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh2),
-					raw_desc_bh3, 1);
-			__m256i raw_desc_bh0_1 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh0),
-					raw_desc_bh1, 1);
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 
-			/**
-			 * to shift the 32b RSS hash value to the
-			 * highest 32b of each 128b before mask
-			 */
-			__m256i rss_hash6_7 =
-				_mm256_slli_epi64(raw_desc_bh6_7, 32);
-			__m256i rss_hash4_5 =
-				_mm256_slli_epi64(raw_desc_bh4_5, 32);
-			__m256i rss_hash2_3 =
-				_mm256_slli_epi64(raw_desc_bh2_3, 32);
-			__m256i rss_hash0_1 =
-				_mm256_slli_epi64(raw_desc_bh0_1, 32);
-
-			__m256i rss_hash_msk =
-				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
-						 0xFFFFFFFF, 0, 0, 0);
-
-			rss_hash6_7 = _mm256_and_si256
-					(rss_hash6_7, rss_hash_msk);
-			rss_hash4_5 = _mm256_and_si256
-					(rss_hash4_5, rss_hash_msk);
-			rss_hash2_3 = _mm256_and_si256
-					(rss_hash2_3, rss_hash_msk);
-			rss_hash0_1 = _mm256_and_si256
-					(rss_hash0_1, rss_hash_msk);
-
-			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
-			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
-			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
-			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
-		} /* if() on RSS hash parsing */
+		/* bit12 is for RSS indication.
+		 * Extract hash value will cause performance drop.
+		 */
+		if (!_mm256_testz_si256(status0_7,
+					_mm256_slli_epi32(dd_status, 12))) {
+			uint32_t hash_val[8];
+
+			hash_val[0] = *(uint32_t *)&rxdp[0].wb.flex_meta2;
+			hash_val[1] = *(uint32_t *)&rxdp[1].wb.flex_meta2;
+			hash_val[2] = *(uint32_t *)&rxdp[2].wb.flex_meta2;
+			hash_val[3] = *(uint32_t *)&rxdp[3].wb.flex_meta2;
+			hash_val[4] = *(uint32_t *)&rxdp[4].wb.flex_meta2;
+			hash_val[5] = *(uint32_t *)&rxdp[5].wb.flex_meta2;
+			hash_val[6] = *(uint32_t *)&rxdp[6].wb.flex_meta2;
+			hash_val[7] = *(uint32_t *)&rxdp[7].wb.flex_meta2;
+
+			mb0_1 = _mm256_insert_epi32(mb0_1, hash_val[0], 3);
+			mb0_1 = _mm256_insert_epi32(mb0_1, hash_val[1], 7);
+			mb2_3 = _mm256_insert_epi32(mb2_3, hash_val[2], 3);
+			mb2_3 = _mm256_insert_epi32(mb2_3, hash_val[3], 7);
+			mb4_5 = _mm256_insert_epi32(mb4_5, hash_val[4], 3);
+			mb4_5 = _mm256_insert_epi32(mb4_5, hash_val[5], 7);
+			mb6_7 = _mm256_insert_epi32(mb6_7, hash_val[6], 3);
+			mb6_7 = _mm256_insert_epi32(mb6_7, hash_val[7], 7);
+		}
 #endif
 
 		/**
@@ -728,8 +669,7 @@
 		}
 
 		/* perform dd_check */
-		status0_7 = _mm256_and_si256(status0_7, dd_check);
-		status0_7 = _mm256_packs_epi32(status0_7,
+		status0_7 = _mm256_packs_epi32(dd_status,
 					       _mm256_setzero_si256());
 
 		uint64_t burst = __builtin_popcountll
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index c4c9a91..dd3b70f 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -439,65 +439,31 @@
 		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
 
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		const __m128i dd_status = _mm_and_si128(staterr, dd_check);
+
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		/**
-		 * needs to load 2nd 16B of each desc for RSS hash parsing,
-		 * will cause performance drop to get into this context.
+
+		/* bit12 is for RSS indication.
+		 * Extract hash value will cause performance drop.
 		 */
-		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
-				DEV_RX_OFFLOAD_RSS_HASH) {
-			/* load bottom half of every 32B desc */
-			const __m128i raw_desc_bh3 =
-				_mm_load_si128
-					((void *)(&rxdp[3].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh2 =
-				_mm_load_si128
-					((void *)(&rxdp[2].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh1 =
-				_mm_load_si128
-					((void *)(&rxdp[1].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh0 =
-				_mm_load_si128
-					((void *)(&rxdp[0].wb.status_error1));
-
-			/**
-			 * to shift the 32b RSS hash value to the
-			 * highest 32b of each 128b before mask
-			 */
-			__m128i rss_hash3 =
-				_mm_slli_epi64(raw_desc_bh3, 32);
-			__m128i rss_hash2 =
-				_mm_slli_epi64(raw_desc_bh2, 32);
-			__m128i rss_hash1 =
-				_mm_slli_epi64(raw_desc_bh1, 32);
-			__m128i rss_hash0 =
-				_mm_slli_epi64(raw_desc_bh0, 32);
-
-			__m128i rss_hash_msk =
-				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
-
-			rss_hash3 = _mm_and_si128
-					(rss_hash3, rss_hash_msk);
-			rss_hash2 = _mm_and_si128
-					(rss_hash2, rss_hash_msk);
-			rss_hash1 = _mm_and_si128
-					(rss_hash1, rss_hash_msk);
-			rss_hash0 = _mm_and_si128
-					(rss_hash0, rss_hash_msk);
-
-			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
-			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
-			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
-			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
-		} /* if() on RSS hash parsing */
+		if (!_mm_testz_si128(staterr, _mm_slli_epi32(dd_status, 12))) {
+			uint32_t hash_val[4];
+
+			hash_val[0] = *(uint32_t *)&rxdp[0].wb.flex_meta2;
+			hash_val[1] = *(uint32_t *)&rxdp[1].wb.flex_meta2;
+			hash_val[2] = *(uint32_t *)&rxdp[2].wb.flex_meta2;
+			hash_val[3] = *(uint32_t *)&rxdp[3].wb.flex_meta2;
+
+			pkt_mb0 = _mm_insert_epi32(pkt_mb0, hash_val[0], 3);
+			pkt_mb1 = _mm_insert_epi32(pkt_mb0, hash_val[1], 3);
+			pkt_mb2 = _mm_insert_epi32(pkt_mb0, hash_val[2], 3);
+			pkt_mb3 = _mm_insert_epi32(pkt_mb0, hash_val[3], 3);
+		}
 #endif
 
-		/* C.2 get 4 pkts staterr value  */
-		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
-
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
@@ -522,8 +488,7 @@
 		}
 
 		/* C.3 calc available number of desc */
-		staterr = _mm_and_si128(staterr, dd_check);
-		staterr = _mm_packs_epi32(staterr, zero);
+		staterr = _mm_packs_epi32(dd_status, zero);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [dpdk-stable] [PATCH] net/ice: fix RSS hash offload at vector mode
@ 2020-10-15 13:48 Zhang Alvin
  0 siblings, 0 replies; 2+ messages in thread
From: Zhang Alvin @ 2020-10-15 13:48 UTC (permalink / raw)
  To: jia.guo, qi.z.zhang; +Cc: dev, Alvin Zhang, stable

From: Alvin Zhang <alvinx.zhang@intel.com>

1. According to whether the RSS offload bit of the received packet is set
   to determin if the RSS hash should be read or not.
2. Simplify the code of reading RSS hash value.

Fixes: 12443386a0b0 ("net/ice: support flex Rx descriptor RxDID22")
Cc: stable@dpdk.org

Signed-off-by: Alvin Zhang <alvinx.zhang@intel.com>
---
 drivers/net/ice/ice_rxtx_vec_avx2.c | 116 +++++++++---------------------------
 drivers/net/ice/ice_rxtx_vec_sse.c  |  79 +++++++-----------------
 2 files changed, 50 insertions(+), 145 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index 5969a30..a47f38c 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -523,94 +523,35 @@
 				_mm256_extract_epi32(fdir_id0_7, 4);
 		} /* if() on fdir_enabled */
 
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		/**
-		 * needs to load 2nd 16B of each desc for RSS hash parsing,
-		 * will cause performance drop to get into this context.
-		 */
-		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
-				DEV_RX_OFFLOAD_RSS_HASH) {
-			/* load bottom half of every 32B desc */
-			const __m128i raw_desc_bh7 =
-				_mm_load_si128
-					((void *)(&rxdp[7].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh6 =
-				_mm_load_si128
-					((void *)(&rxdp[6].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh5 =
-				_mm_load_si128
-					((void *)(&rxdp[5].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh4 =
-				_mm_load_si128
-					((void *)(&rxdp[4].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh3 =
-				_mm_load_si128
-					((void *)(&rxdp[3].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh2 =
-				_mm_load_si128
-					((void *)(&rxdp[2].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh1 =
-				_mm_load_si128
-					((void *)(&rxdp[1].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh0 =
-				_mm_load_si128
-					((void *)(&rxdp[0].wb.status_error1));
+		const __m256i dd_status = _mm256_and_si256(status0_7, dd_check);
 
-			__m256i raw_desc_bh6_7 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh6),
-					raw_desc_bh7, 1);
-			__m256i raw_desc_bh4_5 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh4),
-					raw_desc_bh5, 1);
-			__m256i raw_desc_bh2_3 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh2),
-					raw_desc_bh3, 1);
-			__m256i raw_desc_bh0_1 =
-				_mm256_inserti128_si256
-					(_mm256_castsi128_si256(raw_desc_bh0),
-					raw_desc_bh1, 1);
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 
-			/**
-			 * to shift the 32b RSS hash value to the
-			 * highest 32b of each 128b before mask
-			 */
-			__m256i rss_hash6_7 =
-				_mm256_slli_epi64(raw_desc_bh6_7, 32);
-			__m256i rss_hash4_5 =
-				_mm256_slli_epi64(raw_desc_bh4_5, 32);
-			__m256i rss_hash2_3 =
-				_mm256_slli_epi64(raw_desc_bh2_3, 32);
-			__m256i rss_hash0_1 =
-				_mm256_slli_epi64(raw_desc_bh0_1, 32);
-
-			__m256i rss_hash_msk =
-				_mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
-						 0xFFFFFFFF, 0, 0, 0);
-
-			rss_hash6_7 = _mm256_and_si256
-					(rss_hash6_7, rss_hash_msk);
-			rss_hash4_5 = _mm256_and_si256
-					(rss_hash4_5, rss_hash_msk);
-			rss_hash2_3 = _mm256_and_si256
-					(rss_hash2_3, rss_hash_msk);
-			rss_hash0_1 = _mm256_and_si256
-					(rss_hash0_1, rss_hash_msk);
-
-			mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
-			mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
-			mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
-			mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
-		} /* if() on RSS hash parsing */
+		/* bit12 is for RSS indication.
+		 * Extract hash value will cause performance drop.
+		 */
+		if (!_mm256_testz_si256(status0_7,
+					_mm256_slli_epi32(dd_status, 12))) {
+			uint32_t hash_val[8];
+
+			hash_val[0] = *(uint32_t *)&rxdp[0].wb.flex_meta2;
+			hash_val[1] = *(uint32_t *)&rxdp[1].wb.flex_meta2;
+			hash_val[2] = *(uint32_t *)&rxdp[2].wb.flex_meta2;
+			hash_val[3] = *(uint32_t *)&rxdp[3].wb.flex_meta2;
+			hash_val[4] = *(uint32_t *)&rxdp[4].wb.flex_meta2;
+			hash_val[5] = *(uint32_t *)&rxdp[5].wb.flex_meta2;
+			hash_val[6] = *(uint32_t *)&rxdp[6].wb.flex_meta2;
+			hash_val[7] = *(uint32_t *)&rxdp[7].wb.flex_meta2;
+
+			mb0_1 = _mm256_insert_epi32(mb0_1, hash_val[0], 3);
+			mb0_1 = _mm256_insert_epi32(mb0_1, hash_val[1], 7);
+			mb2_3 = _mm256_insert_epi32(mb2_3, hash_val[2], 3);
+			mb2_3 = _mm256_insert_epi32(mb2_3, hash_val[3], 7);
+			mb4_5 = _mm256_insert_epi32(mb4_5, hash_val[4], 3);
+			mb4_5 = _mm256_insert_epi32(mb4_5, hash_val[5], 7);
+			mb6_7 = _mm256_insert_epi32(mb6_7, hash_val[6], 3);
+			mb6_7 = _mm256_insert_epi32(mb6_7, hash_val[7], 7);
+		}
 #endif
 
 		/**
@@ -728,8 +669,7 @@
 		}
 
 		/* perform dd_check */
-		status0_7 = _mm256_and_si256(status0_7, dd_check);
-		status0_7 = _mm256_packs_epi32(status0_7,
+		status0_7 = _mm256_packs_epi32(dd_status,
 					       _mm256_setzero_si256());
 
 		uint64_t burst = __builtin_popcountll
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index c4c9a91..dd3b70f 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -439,65 +439,31 @@
 		pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
 		pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
 
+		/* C.2 get 4 pkts staterr value  */
+		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
+
+		const __m128i dd_status = _mm_and_si128(staterr, dd_check);
+
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		/**
-		 * needs to load 2nd 16B of each desc for RSS hash parsing,
-		 * will cause performance drop to get into this context.
+
+		/* bit12 is for RSS indication.
+		 * Extract hash value will cause performance drop.
 		 */
-		if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
-				DEV_RX_OFFLOAD_RSS_HASH) {
-			/* load bottom half of every 32B desc */
-			const __m128i raw_desc_bh3 =
-				_mm_load_si128
-					((void *)(&rxdp[3].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh2 =
-				_mm_load_si128
-					((void *)(&rxdp[2].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh1 =
-				_mm_load_si128
-					((void *)(&rxdp[1].wb.status_error1));
-			rte_compiler_barrier();
-			const __m128i raw_desc_bh0 =
-				_mm_load_si128
-					((void *)(&rxdp[0].wb.status_error1));
-
-			/**
-			 * to shift the 32b RSS hash value to the
-			 * highest 32b of each 128b before mask
-			 */
-			__m128i rss_hash3 =
-				_mm_slli_epi64(raw_desc_bh3, 32);
-			__m128i rss_hash2 =
-				_mm_slli_epi64(raw_desc_bh2, 32);
-			__m128i rss_hash1 =
-				_mm_slli_epi64(raw_desc_bh1, 32);
-			__m128i rss_hash0 =
-				_mm_slli_epi64(raw_desc_bh0, 32);
-
-			__m128i rss_hash_msk =
-				_mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
-
-			rss_hash3 = _mm_and_si128
-					(rss_hash3, rss_hash_msk);
-			rss_hash2 = _mm_and_si128
-					(rss_hash2, rss_hash_msk);
-			rss_hash1 = _mm_and_si128
-					(rss_hash1, rss_hash_msk);
-			rss_hash0 = _mm_and_si128
-					(rss_hash0, rss_hash_msk);
-
-			pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
-			pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
-			pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
-			pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
-		} /* if() on RSS hash parsing */
+		if (!_mm_testz_si128(staterr, _mm_slli_epi32(dd_status, 12))) {
+			uint32_t hash_val[4];
+
+			hash_val[0] = *(uint32_t *)&rxdp[0].wb.flex_meta2;
+			hash_val[1] = *(uint32_t *)&rxdp[1].wb.flex_meta2;
+			hash_val[2] = *(uint32_t *)&rxdp[2].wb.flex_meta2;
+			hash_val[3] = *(uint32_t *)&rxdp[3].wb.flex_meta2;
+
+			pkt_mb0 = _mm_insert_epi32(pkt_mb0, hash_val[0], 3);
+			pkt_mb1 = _mm_insert_epi32(pkt_mb0, hash_val[1], 3);
+			pkt_mb2 = _mm_insert_epi32(pkt_mb0, hash_val[2], 3);
+			pkt_mb3 = _mm_insert_epi32(pkt_mb0, hash_val[3], 3);
+		}
 #endif
 
-		/* C.2 get 4 pkts staterr value  */
-		staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
-
 		/* D.3 copy final 3,4 data to rx_pkts */
 		_mm_storeu_si128
 			((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
@@ -522,8 +488,7 @@
 		}
 
 		/* C.3 calc available number of desc */
-		staterr = _mm_and_si128(staterr, dd_check);
-		staterr = _mm_packs_epi32(staterr, zero);
+		staterr = _mm_packs_epi32(dd_status, zero);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
 		_mm_storeu_si128
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-10-15  6:03 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-15  6:03 [dpdk-stable] [PATCH] net/ice: fix RSS hash offload at vector mode Zhang,Alvin
2020-10-15 13:48 Zhang Alvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).