* [PATCH 20.11] net/iavf: fix outer checksum flags
@ 2022-11-04 6:19 Zhichao Zeng
0 siblings, 0 replies; only message in thread
From: Zhichao Zeng @ 2022-11-04 6:19 UTC (permalink / raw)
To: stable; +Cc: luca.boccassi, yidingx.zhou, qi.z.zhang, Zhichao Zeng, Ke Xu
[ upstream commit 3b8c645afa06fcdab8e6899f3c30b2e6f685e962 ]
When receiving tunneled packets, the testpmd output log shows 'ol_flags'
value always as 'RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN', but expected value
should be 'RX_OUTER_L4_CKSUM_GOOD' or 'RX_OUTER_L4_CKSUM_BAD'.
Adding 'RX_OUTER_L4_CKSUM_GOOD' and 'RX_OUTER_L4_CKSUM_BAD' to 'flags' for
normal path, 'l3_l4_flags_shuf' for AVX2 and AVX512 vector path and
'cksum_flags' for SSE vector path to ensure that the 'ol_flags'
can match correct flags.
Fixes: b8b4c54ef9b0 ("net/iavf: support flexible Rx descriptor in normal path")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
Fixes: 9c9aa0040344 ("net/iavf: add offload path for Rx AVX512 flex descriptor")
Cc: stable@dpdk.org
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
Tested-by: Ke Xu <ke1.xu@intel.com>
Acked-by: Qi Zhang <qi.z.zhang@intel.com>
---
drivers/net/iavf/iavf_rxtx.c | 9 +-
drivers/net/iavf/iavf_rxtx_vec_avx2.c | 118 +++++++++++++++------
drivers/net/iavf/iavf_rxtx_vec_avx512.c | 133 ++++++++++++++++++------
drivers/net/iavf/iavf_rxtx_vec_sse.c | 76 ++++++++++----
4 files changed, 251 insertions(+), 85 deletions(-)
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0053a36de2..539048c14e 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -1085,7 +1085,9 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
return 0;
if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
- flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+ flags |= (PKT_RX_IP_CKSUM_GOOD |
+ PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_OUTER_L4_CKSUM_GOOD);
return flags;
}
@@ -1102,6 +1104,11 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
flags |= PKT_RX_EIP_CKSUM_BAD;
+ if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+ flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
+ else
+ flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
+
return flags;
}
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index a006d90a24..1448ef322b 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -622,43 +622,88 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
* bit13 is for VLAN indication.
*/
const __m256i flags_mask =
- _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+ _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
/**
* data to be shuffled by the result of the flags mask shifted by 4
* bits. This gives use the l3_l4 flags.
*/
- const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
- /* shift right 1 bit to make sure it not exceed 255 */
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- /* second 128-bits */
- 0, 0, 0, 0, 0, 0, 0, 0,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+ const __m256i l3_l4_flags_shuf =
+ _mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ /**
+ * second 128-bits
+ * shift right 20 bits to use the low two bits to indicate
+ * outer checksum status
+ * shift right 1 bit to make sure it not exceed 255
+ */
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i cksum_mask =
- _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
- PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_EIP_CKSUM_BAD);
+ _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
+ PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_OUTER_L4_CKSUM_MASK);
/**
* data to be shuffled by result of flag mask, shifted down 12.
* If RSS(bit12)/VLAN(bit13) are set,
@@ -824,6 +869,15 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
_mm256_srli_epi32(flag_bits, 4));
l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+ __m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+ __m256i l4_outer_flags =
+ _mm256_and_si256(l3_l4_flags, l4_outer_mask);
+ l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+ __m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+
+ l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+ l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
/* set rss and vlan flags */
const __m256i rss_vlan_flag_bits =
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index d56a523940..547c7a8204 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -757,43 +757,103 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
* bit13 is for VLAN indication.
*/
const __m256i flags_mask =
- _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+ _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
/**
* data to be shuffled by the result of the flags mask shifted by 4
* bits. This gives use the l3_l4 flags.
*/
- const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
- /* shift right 1 bit to make sure it not exceed 255 */
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- /* second 128-bits */
- 0, 0, 0, 0, 0, 0, 0, 0,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+ const __m256i l3_l4_flags_shuf =
+ _mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ /**
+ * second 128-bits
+ * shift right 20 bits to use the low two bits
+ * to indicate outer checksum status
+ * shift right 1 bit to make sure it not exceed 255
+ */
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i cksum_mask =
- _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
- PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_EIP_CKSUM_BAD);
+ _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
+ PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_OUTER_L4_CKSUM_MASK);
/**
* data to be shuffled by result of flag mask, shifted down 12.
* If RSS(bit12)/VLAN(bit13) are set,
@@ -953,6 +1013,15 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq,
__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
_mm256_srli_epi32(flag_bits, 4));
l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+ __m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+ __m256i l4_outer_flags =
+ _mm256_and_si256(l3_l4_flags, l4_outer_mask);
+ l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+ __m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+
+ l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+ l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
/* set rss and vlan flags */
const __m256i rss_vlan_flag_bits =
diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c
index b3ea8bc86d..fd4afe3741 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
@@ -222,39 +222,68 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
* bit12 for RSS indication.
* bit13 for VLAN indication.
*/
- const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
- 0x3070, 0x3070);
+ const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0,
+ 0x30f0, 0x30f0);
const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
+ PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD);
/* map the checksum, rss and vlan fields to the checksum, rss
* and vlan flag
*/
- const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
- /* shift right 1 bit to make sure it not exceed 255 */
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+ const __m128i cksum_flags =
+ _mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ /**
+ * shift right 20 bits to use the low two bits to indicate
+ * outer checksum status
+ * shift right 1 bit to make sure it not exceed 255
+ */
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
0, 0, 0, 0,
@@ -274,6 +303,13 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
/* then we shift left 1 bit */
flags = _mm_slli_epi32(flags, 1);
+ __m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6);
+ __m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask);
+ l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20);
+
+ __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6);
+ __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask);
+ flags = _mm_or_si128(l3_l4_flags, l4_outer_flags);
/* we need to mask out the redundant bits introduced by RSS or
* VLAN fields.
*/
@@ -325,10 +361,10 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],
* appropriate flags means that we have to do a shift and blend for
* each mbuf before we do the write.
*/
- rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
- rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
- rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
- rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+ rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30);
+ rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30);
+ rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30);
+ rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30);
/* write the rearm data and the olflags in one write */
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
--
2.25.1
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-11-04 6:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-04 6:19 [PATCH 20.11] net/iavf: fix outer checksum flags Zhichao Zeng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).