We were trying to reuse 32-byte CQE macros for compressed CQE handling. And that was causing the packet error bits to be misinterpreted. Fix it by using appropriate macros. Fixes: d58c6c077cc2 ("net/bnxt: add AVX2 Rx for compressed CQE") Signed-off-by: Ajit Khaparde --- drivers/net/bnxt/bnxt_rxtx_vec_avx2.c | 39 ++++++++++++++++----------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c index ce6b597611..976f258044 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c @@ -392,19 +392,21 @@ crx_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 0xFF, 0xFF, 3, 2, /* pkt_len */ 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */ const __m256i flags_type_mask = - _mm256_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); + _mm256_set1_epi32(RX_PKT_COMPRESS_CMPL_FLAGS_ITYPE_MASK); const __m256i flags2_mask1 = - _mm256_set1_epi32(CMPL_FLAGS2_VLAN_TUN_MSK); + _mm256_set1_epi32(CMPL_FLAGS2_VLAN_TUN_MSK_CRX); const __m256i flags2_mask2 = - _mm256_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); + _mm256_set1_epi32(RX_PKT_COMPRESS_CMPL_FLAGS_IP_TYPE); const __m256i rss_mask = - _mm256_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); + _mm256_set1_epi32(RX_PKT_COMPRESS_CMPL_FLAGS_RSS_VALID); __m256i t0, t1, flags_type, flags2, index, errors; __m256i ptype_idx, ptypes, is_tunnel; __m256i mbuf01, mbuf23, mbuf45, mbuf67; __m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5, rearm6, rearm7; __m256i ol_flags, ol_flags_hi; __m256i rss_flags; + __m256i errors_v2; + __m256i cs_err_v2; /* Validate ptype table indexing at build time. */ bnxt_check_ptype_constants(); @@ -447,7 +449,6 @@ crx_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) cons += BNXT_RX_DESCS_PER_LOOP_VEC256, mbcons += BNXT_RX_DESCS_PER_LOOP_VEC256) { __m256i rxcmp0_1, rxcmp2_3, rxcmp4_5, rxcmp6_7, info3_v; - __m256i errors_v2; uint32_t num_valid; /* Copy eight mbuf pointers to output array. */ @@ -470,6 +471,7 @@ crx_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxcmp2_3 = _mm256_loadu_si256((void *)&cp_desc_ring[cons + 2]); rte_compiler_barrier(); rxcmp0_1 = _mm256_loadu_si256((void *)&cp_desc_ring[cons + 0]); + rte_compiler_barrier(); /* Compute packet type table indices for eight packets. */ t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3); @@ -477,19 +479,19 @@ crx_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) flags_type = _mm256_unpacklo_epi64(t0, t1); ptype_idx = _mm256_and_si256(flags_type, flags_type_mask); ptype_idx = _mm256_srli_epi32(ptype_idx, - RX_PKT_CMPL_FLAGS_ITYPE_SFT - + RX_PKT_COMPRESS_CMPL_FLAGS_ITYPE_SFT - BNXT_PTYPE_TBL_TYPE_SFT); - t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3); - t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7); - flags2 = _mm256_unpackhi_epi64(t0, t1); + t0 = _mm256_unpackhi_epi32(rxcmp0_1, rxcmp2_3); + t1 = _mm256_unpackhi_epi32(rxcmp4_5, rxcmp6_7); + cs_err_v2 = _mm256_unpacklo_epi64(t0, t1); - t0 = _mm256_srli_epi32(_mm256_and_si256(flags2, flags2_mask1), - RX_PKT_CMPL_FLAGS2_META_FORMAT_SFT - + t0 = _mm256_srli_epi32(_mm256_and_si256(cs_err_v2, flags2_mask1), + RX_PKT_COMPRESS_CMPL_METADATA1_SFT - BNXT_PTYPE_TBL_VLAN_SFT); ptype_idx = _mm256_or_si256(ptype_idx, t0); - t0 = _mm256_srli_epi32(_mm256_and_si256(flags2, flags2_mask2), + t0 = _mm256_srli_epi32(_mm256_and_si256(cs_err_v2, flags2_mask2), RX_PKT_CMPL_FLAGS2_IP_TYPE_SFT - BNXT_PTYPE_TBL_IP_VER_SFT); ptype_idx = _mm256_or_si256(ptype_idx, t0); @@ -505,17 +507,22 @@ crx_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) * Compute ol_flags and checksum error table indices for eight * packets. */ - is_tunnel = _mm256_and_si256(flags2, _mm256_set1_epi32(4)); + is_tunnel = _mm256_and_si256(cs_err_v2, + _mm256_set1_epi32(BNXT_CRX_TUN_CS_CALC)); is_tunnel = _mm256_slli_epi32(is_tunnel, 3); - flags2 = _mm256_and_si256(flags2, _mm256_set1_epi32(0x1F)); + + flags2 = _mm256_and_si256(cs_err_v2, + _mm256_set1_epi32(BNXT_CRX_CQE_CSUM_CALC_MASK)); + flags2 = _mm256_srli_epi64(flags2, 8); /* Extract errors_v2 fields for eight packets. */ t0 = _mm256_unpackhi_epi32(rxcmp0_1, rxcmp2_3); t1 = _mm256_unpackhi_epi32(rxcmp4_5, rxcmp6_7); errors_v2 = _mm256_unpacklo_epi64(t0, t1); - errors = _mm256_srli_epi32(errors_v2, 4); - errors = _mm256_and_si256(errors, _mm256_set1_epi32(0xF)); + /* Compute errors out of cs_err_v2 to index into flags table. */ + errors = _mm256_and_si256(cs_err_v2, _mm256_set1_epi32(0xF0)); + errors = _mm256_srli_epi32(errors, 4); errors = _mm256_and_si256(errors, flags2); index = _mm256_andnot_si256(errors, flags2); -- 2.39.2 (Apple Git-143)