DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping
@ 2024-11-21 13:32 Alexander Kozyrev
  2024-11-21 14:56 ` Dariusz Sosnowski
  2024-11-29 20:44 ` [PATCH v2] " Alexander Kozyrev
  0 siblings, 2 replies; 4+ messages in thread
From: Alexander Kozyrev @ 2024-11-21 13:32 UTC (permalink / raw)
  To: dev; +Cc: stable, rasland, viacheslavo, matan, dsosnowski, bingz, suanmingm

Take the RSS hash value for the title packet for
flow tag and packet header CQE zipping formats.

Fixes: 54c2d46b16 ("net/mlx5: support flow tag and packet header miniCQEs")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 9 +++++----
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 9 +++++----
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 9 +++++----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 240987d03d..18452cc047 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -452,6 +452,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				(uint32_t)t_pkt->ol_flags,
 				(uint32_t)t_pkt->ol_flags,
 				(uint32_t)t_pkt->ol_flags};
+			const uint32_t hash_rss = t_pkt->hash.rss;
 
 			ol_flags_mask = (__vector unsigned char)
 				vec_or((__vector unsigned long)ol_flags_mask,
@@ -470,10 +471,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				((__vector unsigned int)ol_flags)[2];
 			elts[pos + 3]->ol_flags =
 				((__vector unsigned int)ol_flags)[3];
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index dc1d30753d..653a10867d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -330,6 +330,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				vdupq_n_u32(RTE_MBUF_F_RX_RSS_HASH);
 			const uint32x4_t rearm_flags =
 				vdupq_n_u32((uint32_t)t_pkt->ol_flags);
+			const uint32_t hash_rss = t_pkt->hash.rss;
 
 			ol_flags_mask = vorrq_u32(ol_flags_mask, hash_flags);
 			ol_flags = vorrq_u32(ol_flags,
@@ -338,10 +339,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			elts[pos + 1]->ol_flags = vgetq_lane_u32(ol_flags, 2);
 			elts[pos + 2]->ol_flags = vgetq_lane_u32(ol_flags, 1);
 			elts[pos + 3]->ol_flags = vgetq_lane_u32(ol_flags, 0);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 81a177fce7..fd47677db1 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -314,6 +314,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				_mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
 			const __m128i rearm_flags =
 				_mm_set1_epi32((uint32_t)t_pkt->ol_flags);
+			const uint32_t hash_rss = t_pkt->hash.rss;
 
 			ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
 			ol_flags = _mm_or_si128(ol_flags,
@@ -326,10 +327,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				_mm_extract_epi32(ol_flags, 2);
 			elts[pos + 3]->ol_flags =
 				_mm_extract_epi32(ol_flags, 3);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
-- 
2.43.5


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping
  2024-11-21 13:32 [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping Alexander Kozyrev
@ 2024-11-21 14:56 ` Dariusz Sosnowski
  2024-11-29 20:44 ` [PATCH v2] " Alexander Kozyrev
  1 sibling, 0 replies; 4+ messages in thread
From: Dariusz Sosnowski @ 2024-11-21 14:56 UTC (permalink / raw)
  To: Alexander Kozyrev, dev
  Cc: stable, Raslan Darawsheh, Slava Ovsiienko, Matan Azrad,
	Bing Zhao, Suanming Mou



> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Thursday, November 21, 2024 14:32
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>; Slava Ovsiienko
> <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>; Dariusz
> Sosnowski <dsosnowski@nvidia.com>; Bing Zhao <bingz@nvidia.com>;
> Suanming Mou <suanmingm@nvidia.com>
> Subject: [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping
> 
> Take the RSS hash value for the title packet for flow tag and packet header CQE
> zipping formats.
> 
> Fixes: 54c2d46b16 ("net/mlx5: support flow tag and packet header miniCQEs")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>

Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>

Best regards,
Dariusz Sosnowski

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] net/mlx5: fix RSS hash for non-RSS CQE zipping
  2024-11-21 13:32 [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping Alexander Kozyrev
  2024-11-21 14:56 ` Dariusz Sosnowski
@ 2024-11-29 20:44 ` Alexander Kozyrev
  2024-11-30  0:39   ` [PATCH v3] " Alexander Kozyrev
  1 sibling, 1 reply; 4+ messages in thread
From: Alexander Kozyrev @ 2024-11-29 20:44 UTC (permalink / raw)
  To: dev; +Cc: stable, rasland, viacheslavo, matan, dsosnowski, bingz, suanmingm

Take the RSS hash and flow tag values from the title packet
before they get overwritten by the decompressing routine.
Set the RSS hash flag in the packet mbuf if RSS is enabled
in case of non-RSS CQE zipping format.

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 23 +++++++++++++----------
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 15 ++++++++-------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 15 ++++++++-------
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 240987d03d..3b2f33d138 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -82,6 +82,8 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		(void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = t_pkt->hash.rss * rxq->rss_hash;
+	const uint32_t flow_tag = t_pkt->hash.fdir.hi;
 	const __vector unsigned char zero = (__vector unsigned char){0};
 	/* Mask to shuffle from extracted mini CQE to mbuf. */
 	const __vector unsigned char shuf_mask1 = (__vector unsigned char){
@@ -266,8 +268,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		if (rxq->mark) {
 			if (rxq->mcqe_format !=
 			    MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
-				const uint32_t flow_tag = t_pkt->hash.fdir.hi;
-
 				/* E.1 store flow tag (rte_flow mark). */
 				elts[pos]->hash.fdir.hi = flow_tag;
 				elts[pos + 1]->hash.fdir.hi = flow_tag;
@@ -442,10 +442,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			}
 			const __vector unsigned char hash_mask =
 				(__vector unsigned char)(__vector unsigned int) {
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH};
+					rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+					rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+					rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+					rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH};
 			const __vector unsigned char rearm_flags =
 				(__vector unsigned char)(__vector unsigned int) {
 				(uint32_t)t_pkt->ol_flags,
@@ -456,6 +456,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			ol_flags_mask = (__vector unsigned char)
 				vec_or((__vector unsigned long)ol_flags_mask,
 				(__vector unsigned long)hash_mask);
+			ol_flags = (__vector unsigned char)
+				vec_or((__vector unsigned long)ol_flags,
+				(__vector unsigned long)hash_mask);
 			ol_flags = (__vector unsigned char)
 				vec_or((__vector unsigned long)ol_flags,
 				(__vector unsigned long)
@@ -470,10 +473,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				((__vector unsigned int)ol_flags)[2];
 			elts[pos + 3]->ol_flags =
 				((__vector unsigned int)ol_flags)[3];
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index dc1d30753d..58e3918ef4 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -78,6 +78,8 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		(void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = t_pkt->hash.rss * rxq->rss_hash;
+	const uint32_t flow_tag = t_pkt->hash.fdir.hi;
 	unsigned int pos;
 	unsigned int i;
 	unsigned int inv = 0;
@@ -211,8 +213,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		if (rxq->mark) {
 			if (rxq->mcqe_format !=
 			    MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
-				const uint32_t flow_tag = t_pkt->hash.fdir.hi;
-
 				/* E.1 store flow tag (rte_flow mark). */
 				elts[pos]->hash.fdir.hi = flow_tag;
 				elts[pos + 1]->hash.fdir.hi = flow_tag;
@@ -327,21 +327,22 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				}
 			}
 			const uint32x4_t hash_flags =
-				vdupq_n_u32(RTE_MBUF_F_RX_RSS_HASH);
+				vdupq_n_u32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
 			const uint32x4_t rearm_flags =
 				vdupq_n_u32((uint32_t)t_pkt->ol_flags);
 
 			ol_flags_mask = vorrq_u32(ol_flags_mask, hash_flags);
+			ol_flags = vorrq_u32(ol_flags, hash_flags);
 			ol_flags = vorrq_u32(ol_flags,
 					vbicq_u32(rearm_flags, ol_flags_mask));
 			elts[pos]->ol_flags = vgetq_lane_u32(ol_flags, 3);
 			elts[pos + 1]->ol_flags = vgetq_lane_u32(ol_flags, 2);
 			elts[pos + 2]->ol_flags = vgetq_lane_u32(ol_flags, 1);
 			elts[pos + 3]->ol_flags = vgetq_lane_u32(ol_flags, 0);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 81a177fce7..8a83a0e59d 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -78,6 +78,8 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 	volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + !rxq->cqe_comp_layout);
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = t_pkt->hash.rss * rxq->rss_hash;
+	const uint32_t flow_tag = t_pkt->hash.fdir.hi;
 	unsigned int pos;
 	unsigned int i;
 	unsigned int inv = 0;
@@ -194,8 +196,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		if (rxq->mark) {
 			if (rxq->mcqe_format !=
 				MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
-				const uint32_t flow_tag = t_pkt->hash.fdir.hi;
-
 				/* E.1 store flow tag (rte_flow mark). */
 				elts[pos]->hash.fdir.hi = flow_tag;
 				elts[pos + 1]->hash.fdir.hi = flow_tag;
@@ -311,11 +311,12 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				}
 			}
 			const __m128i hash_flags =
-				_mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
+				_mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
 			const __m128i rearm_flags =
 				_mm_set1_epi32((uint32_t)t_pkt->ol_flags);
 
 			ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
+			ol_flags = _mm_or_si128(ol_flags, hash_flags);
 			ol_flags = _mm_or_si128(ol_flags,
 				_mm_andnot_si128(ol_flags_mask, rearm_flags));
 			elts[pos]->ol_flags =
@@ -326,10 +327,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				_mm_extract_epi32(ol_flags, 2);
 			elts[pos + 3]->ol_flags =
 				_mm_extract_epi32(ol_flags, 3);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
-- 
2.43.5


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3] net/mlx5: fix RSS hash for non-RSS CQE zipping
  2024-11-29 20:44 ` [PATCH v2] " Alexander Kozyrev
@ 2024-11-30  0:39   ` Alexander Kozyrev
  0 siblings, 0 replies; 4+ messages in thread
From: Alexander Kozyrev @ 2024-11-30  0:39 UTC (permalink / raw)
  To: dev; +Cc: stable, rasland, viacheslavo, matan, dsosnowski, bingz, suanmingm

Take the RSS hash value from the title packet
before it gets overwritten by the decompression routine.
Set the RSS hash flag in the packet mbuf if RSS is enabled
in case of non-RSS CQE zipping format.

Fixes: 54c2d46 ("net/mlx5: support flow tag and packet header miniCQEs")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 32 +++++++++++++-----------
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 18 ++++++-------
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 18 ++++++-------
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
index 240987d03d..0f48298def 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
@@ -82,6 +82,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		(void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = rxq->rss_hash * t_pkt->hash.rss;
 	const __vector unsigned char zero = (__vector unsigned char){0};
 	/* Mask to shuffle from extracted mini CQE to mbuf. */
 	const __vector unsigned char shuf_mask1 = (__vector unsigned char){
@@ -113,8 +114,18 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 	const __vector unsigned short rxdf_sel_mask =
 		(__vector unsigned short){
 			0xffff, 0xffff, 0, 0, 0, 0xffff, 0, 0};
-	__vector unsigned char ol_flags = (__vector unsigned char){0};
-	__vector unsigned char ol_flags_mask = (__vector unsigned char){0};
+	__vector unsigned char ol_flags =
+			(__vector unsigned char)(__vector unsigned int) {
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH};
+	__vector unsigned char ol_flags_mask =
+			(__vector unsigned char)(__vector unsigned int) {
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH,
+				rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH};
 	unsigned int pos;
 	unsigned int i;
 	unsigned int inv = 0;
@@ -440,12 +451,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 						pkt_info) & (1 << 6));
 				}
 			}
-			const __vector unsigned char hash_mask =
-				(__vector unsigned char)(__vector unsigned int) {
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH,
-					RTE_MBUF_F_RX_RSS_HASH};
 			const __vector unsigned char rearm_flags =
 				(__vector unsigned char)(__vector unsigned int) {
 				(uint32_t)t_pkt->ol_flags,
@@ -453,9 +458,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				(uint32_t)t_pkt->ol_flags,
 				(uint32_t)t_pkt->ol_flags};
 
-			ol_flags_mask = (__vector unsigned char)
-				vec_or((__vector unsigned long)ol_flags_mask,
-				(__vector unsigned long)hash_mask);
 			ol_flags = (__vector unsigned char)
 				vec_or((__vector unsigned long)ol_flags,
 				(__vector unsigned long)
@@ -470,10 +472,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				((__vector unsigned int)ol_flags)[2];
 			elts[pos + 3]->ol_flags =
 				((__vector unsigned int)ol_flags)[3];
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index dc1d30753d..462819cb4a 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -78,6 +78,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		(void *)&(cq + !rxq->cqe_comp_layout)->pkt_info;
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = rxq->rss_hash * t_pkt->hash.rss;
 	unsigned int pos;
 	unsigned int i;
 	unsigned int inv = 0;
@@ -117,8 +118,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
 		0, 0
 	};
-	uint32x4_t ol_flags = {0, 0, 0, 0};
-	uint32x4_t ol_flags_mask = {0, 0, 0, 0};
+	uint32x4_t ol_flags =
+		vdupq_n_u32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
+	uint32x4_t ol_flags_mask =
+		vdupq_n_u32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint32_t rcvd_byte = 0;
 #endif
@@ -326,22 +329,19 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 						pkt_info) & (1 << 6));
 				}
 			}
-			const uint32x4_t hash_flags =
-				vdupq_n_u32(RTE_MBUF_F_RX_RSS_HASH);
 			const uint32x4_t rearm_flags =
 				vdupq_n_u32((uint32_t)t_pkt->ol_flags);
 
-			ol_flags_mask = vorrq_u32(ol_flags_mask, hash_flags);
 			ol_flags = vorrq_u32(ol_flags,
 					vbicq_u32(rearm_flags, ol_flags_mask));
 			elts[pos]->ol_flags = vgetq_lane_u32(ol_flags, 3);
 			elts[pos + 1]->ol_flags = vgetq_lane_u32(ol_flags, 2);
 			elts[pos + 2]->ol_flags = vgetq_lane_u32(ol_flags, 1);
 			elts[pos + 3]->ol_flags = vgetq_lane_u32(ol_flags, 0);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 81a177fce7..fc1b436b72 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -78,6 +78,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 	volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + !rxq->cqe_comp_layout);
 	/* Title packet is pre-built. */
 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
+	const uint32_t hash_rss = rxq->rss_hash * t_pkt->hash.rss;
 	unsigned int pos;
 	unsigned int i;
 	unsigned int inv = 0;
@@ -108,8 +109,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 			      0,
 			      rxq->crc_present * RTE_ETHER_CRC_LEN,
 			      0, 0);
-	__m128i ol_flags = _mm_setzero_si128();
-	__m128i ol_flags_mask = _mm_setzero_si128();
+	__m128i ol_flags =
+		_mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
+	__m128i ol_flags_mask =
+		_mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	const __m128i zero = _mm_setzero_si128();
 	const __m128i ones = _mm_cmpeq_epi32(zero, zero);
@@ -310,12 +313,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 						pkt_info) & (1 << 6));
 				}
 			}
-			const __m128i hash_flags =
-				_mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
 			const __m128i rearm_flags =
 				_mm_set1_epi32((uint32_t)t_pkt->ol_flags);
 
-			ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
 			ol_flags = _mm_or_si128(ol_flags,
 				_mm_andnot_si128(ol_flags_mask, rearm_flags));
 			elts[pos]->ol_flags =
@@ -326,10 +326,10 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 				_mm_extract_epi32(ol_flags, 2);
 			elts[pos + 3]->ol_flags =
 				_mm_extract_epi32(ol_flags, 3);
-			elts[pos]->hash.rss = 0;
-			elts[pos + 1]->hash.rss = 0;
-			elts[pos + 2]->hash.rss = 0;
-			elts[pos + 3]->hash.rss = 0;
+			elts[pos]->hash.rss = hash_rss;
+			elts[pos + 1]->hash.rss = hash_rss;
+			elts[pos + 2]->hash.rss = hash_rss;
+			elts[pos + 3]->hash.rss = hash_rss;
 		}
 		if (rxq->dynf_meta) {
 			int32_t offs = rxq->flow_meta_offset;
-- 
2.43.5


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-11-30  0:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-21 13:32 [PATCH] net/mlx5: fix RSS hash for non-RSS CQE zipping Alexander Kozyrev
2024-11-21 14:56 ` Dariusz Sosnowski
2024-11-29 20:44 ` [PATCH v2] " Alexander Kozyrev
2024-11-30  0:39   ` [PATCH v3] " Alexander Kozyrev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).