From: Lance Richardson <lance.richardson@broadcom.com> Refactor offload flags mapping table to be dynamic and per-ring instead of static and global. Signed-off-by: Lance Richardson <lance.richardson@broadcom.com> Cc: stable@dpdk.org --- drivers/net/bnxt/bnxt_rxr.c | 34 +++++++++------------------ drivers/net/bnxt/bnxt_rxr.h | 12 +++++----- drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 10 ++++---- drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 10 ++++---- 4 files changed, 29 insertions(+), 37 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c index 288b403bf0..1edc8dac43 100644 --- a/drivers/net/bnxt/bnxt_rxr.c +++ b/drivers/net/bnxt/bnxt_rxr.c @@ -415,24 +415,14 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1) return bnxt_ptype_table[index]; } -uint32_t -bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM] __rte_cache_aligned; - -uint32_t -bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM] __rte_cache_aligned; - static void __rte_cold -bnxt_init_ol_flags_tables(void) +bnxt_init_ol_flags_tables(struct bnxt_rx_ring_info *rxr) { - static bool initialized; uint32_t *pt; int i; - if (initialized) - return; - /* Initialize ol_flags table. */ - pt = bnxt_ol_flags_table; + pt = rxr->ol_flags_table; for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) { pt[i] = 0; if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) @@ -449,7 +439,7 @@ bnxt_init_ol_flags_tables(void) } /* Initialize checksum error table. */ - pt = bnxt_ol_flags_err_table; + pt = rxr->ol_flags_err_table; for (i = 0; i < BNXT_OL_FLAGS_ERR_TBL_DIM; i++) { pt[i] = 0; if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) @@ -464,13 +454,11 @@ bnxt_init_ol_flags_tables(void) if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; } - - initialized = true; } static void -bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1, - struct rte_mbuf *mbuf) +bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, + struct rx_pkt_cmpl_hi *rxcmp1, struct rte_mbuf *mbuf) { uint16_t flags_type, errors, flags; uint64_t ol_flags; @@ -491,10 +479,10 @@ bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1, RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR); errors = (errors >> 4) & flags; - ol_flags = bnxt_ol_flags_table[flags & ~errors]; + ol_flags = rxr->ol_flags_table[flags & ~errors]; if (errors) - ol_flags |= bnxt_ol_flags_err_table[errors]; + ol_flags |= rxr->ol_flags_err_table[errors]; if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rte_le_to_cpu_32(rxcmp->rss_hash); @@ -749,7 +737,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, mbuf->data_len = mbuf->pkt_len; mbuf->port = rxq->port_id; - bnxt_set_ol_flags(rxcmp, rxcmp1, mbuf); + bnxt_set_ol_flags(rxr, rxcmp, rxcmp1, mbuf); #ifdef RTE_LIBRTE_IEEE1588 if (unlikely((rte_le_to_cpu_16(rxcmp->flags_type) & @@ -1127,9 +1115,6 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) /* Initialize packet type table. */ bnxt_init_ptype_table(); - /* Initialize offload flags parsing table. */ - bnxt_init_ol_flags_tables(); - size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM; size = RTE_MIN(BNXT_MAX_PKT_LEN, size); @@ -1139,6 +1124,9 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) ring = rxr->rx_ring_struct; bnxt_init_rxbds(ring, type, size); + /* Initialize offload flags parsing table. */ + bnxt_init_ol_flags_tables(rxr); + raw_prod = rxr->rx_raw_prod; for (i = 0; i < ring->ring_size; i++) { if (unlikely(!rxr->rx_buf_ring[i])) { diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h index af6ff0972d..4db1e8761e 100644 --- a/drivers/net/bnxt/bnxt_rxr.h +++ b/drivers/net/bnxt/bnxt_rxr.h @@ -42,6 +42,9 @@ static inline uint16_t bnxt_tpa_start_agg_id(struct bnxt *bp, /* Number of descriptors to process per inner loop in vector mode. */ #define RTE_BNXT_DESCS_PER_LOOP 4U +#define BNXT_OL_FLAGS_TBL_DIM 32 +#define BNXT_OL_FLAGS_ERR_TBL_DIM 16 + struct bnxt_tpa_info { struct rte_mbuf *mbuf; uint16_t len; @@ -73,6 +76,9 @@ struct bnxt_rx_ring_info { struct rte_bitmap *ag_bitmap; struct bnxt_tpa_info *tpa_info; + + uint32_t ol_flags_table[BNXT_OL_FLAGS_TBL_DIM]; + uint32_t ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM]; }; uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -116,10 +122,4 @@ bnxt_cfa_code_dynfield(struct rte_mbuf *mbuf) #define BNXT_PTYPE_TBL_DIM 128 extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM]; - -#define BNXT_OL_FLAGS_TBL_DIM 32 -extern uint32_t bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM]; - -#define BNXT_OL_FLAGS_ERR_TBL_DIM 16 -extern uint32_t bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM]; #endif diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index 81f9a7da2b..d9ac822be8 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = vgetq_lane_u32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[vgetq_lane_u32((ol_idx), (pi))]; \ + rxr->ol_flags_table[vgetq_lane_u32((ol_idx), (pi))]; \ \ tmp = vgetq_lane_u32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -58,7 +58,8 @@ static void descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], - uint64x2_t mb_init, struct rte_mbuf **mbuf) + uint64x2_t mb_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const uint8x16_t shuf_msk = { 0xFF, 0xFF, 0xFF, 0xFF, /* pkt_type (zeroes) */ @@ -286,7 +287,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, goto out; } - descs_to_mbufs(rxcmp, rxcmp1, mb_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mb_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; if (num_valid < RTE_BNXT_DESCS_PER_LOOP) diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index ce92629ab0..7f5825d333 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = _mm_extract_epi32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ + rxr->ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ \ tmp = _mm_extract_epi32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -54,7 +54,8 @@ static inline void descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], - __m128i mbuf_init, struct rte_mbuf **mbuf) + __m128i mbuf_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const __m128i shuf_msk = _mm_set_epi8(15, 14, 13, 12, /* rss */ @@ -268,7 +269,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, goto out; } - descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; if (num_valid < RTE_BNXT_DESCS_PER_LOOP) -- 2.25.1
From: Lance Richardson <lance.richardson@broadcom.com> The setting of the mbuf ol_flags field for tunneled packets should be different depending upon whether DEV_RX_OFFLOAD_OUTER_* offloads are enabled. Initialize ol_flags mappings based on the receive offload configuration when the receive ring is initialized. Signed-off-by: Lance Richardson <lance.richardson@broadcom.com> Cc: stable@dpdk.org --- drivers/net/bnxt/bnxt_rxr.c | 85 +++++++++++++++++++++------ drivers/net/bnxt/bnxt_rxr.h | 4 +- drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 6 +- drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 6 +- 4 files changed, 80 insertions(+), 21 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c index 1edc8dac43..14901f1b99 100644 --- a/drivers/net/bnxt/bnxt_rxr.c +++ b/drivers/net/bnxt/bnxt_rxr.c @@ -416,43 +416,91 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1) } static void __rte_cold -bnxt_init_ol_flags_tables(struct bnxt_rx_ring_info *rxr) +bnxt_init_ol_flags_tables(struct bnxt_rx_queue *rxq) { + struct bnxt_rx_ring_info *rxr = rxq->rx_ring; + struct rte_eth_conf *dev_conf; + bool outer_cksum_enabled; + uint64_t offloads; uint32_t *pt; int i; + dev_conf = &rxq->bp->eth_dev->data->dev_conf; + offloads = dev_conf->rxmode.offloads; + + outer_cksum_enabled = !!(offloads & (DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_OUTER_UDP_CKSUM)); + /* Initialize ol_flags table. */ pt = rxr->ol_flags_table; for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) { pt[i] = 0; + if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) - pt[i] |= PKT_RX_IP_CKSUM_GOOD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 3)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) - pt[i] |= PKT_RX_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + } else { + if (i & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } + } else { + /* Non-tunnel case. */ + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } } /* Initialize checksum error table. */ pt = rxr->ol_flags_err_table; for (i = 0; i < BNXT_OL_FLAGS_ERR_TBL_DIM; i++) { pt[i] = 0; - if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_IP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_L4_CKSUM_BAD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 2)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_EIP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_EIP_CKSUM_BAD; + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; + } else { + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } + } else { + /* Non-tunnel case. */ + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } } } @@ -472,6 +520,7 @@ bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC | RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN); + flags |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 3; errors = rte_le_to_cpu_16(rxcmp1->errors_v2) & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR | RX_PKT_CMPL_ERRORS_L4_CS_ERROR | @@ -481,8 +530,10 @@ bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, ol_flags = rxr->ol_flags_table[flags & ~errors]; - if (errors) + if (unlikely(errors)) { + errors |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 2; ol_flags |= rxr->ol_flags_err_table[errors]; + } if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rte_le_to_cpu_32(rxcmp->rss_hash); @@ -1125,7 +1176,7 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) bnxt_init_rxbds(ring, type, size); /* Initialize offload flags parsing table. */ - bnxt_init_ol_flags_tables(rxr); + bnxt_init_ol_flags_tables(rxq); raw_prod = rxr->rx_raw_prod; for (i = 0; i < ring->ring_size; i++) { diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h index 4db1e8761e..b2942030ab 100644 --- a/drivers/net/bnxt/bnxt_rxr.h +++ b/drivers/net/bnxt/bnxt_rxr.h @@ -42,8 +42,8 @@ static inline uint16_t bnxt_tpa_start_agg_id(struct bnxt *bp, /* Number of descriptors to process per inner loop in vector mode. */ #define RTE_BNXT_DESCS_PER_LOOP 4U -#define BNXT_OL_FLAGS_TBL_DIM 32 -#define BNXT_OL_FLAGS_ERR_TBL_DIM 16 +#define BNXT_OL_FLAGS_TBL_DIM 64 +#define BNXT_OL_FLAGS_ERR_TBL_DIM 32 struct bnxt_tpa_info { struct rte_mbuf *mbuf; diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index d9ac822be8..4839e2a38d 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -80,7 +80,7 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); uint32x4_t flags_type, flags2, index, errors, rss_flags; - uint32x4_t tmp, ptype_idx; + uint32x4_t tmp, ptype_idx, is_tunnel; uint64x2_t t0, t1; uint32_t ol_flags; @@ -117,10 +117,14 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], vget_low_u64(t1))); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = vandq_u32(flags2, vdupq_n_u32(4)); + is_tunnel = vshlq_n_u32(is_tunnel, 3); errors = vandq_u32(vshrq_n_u32(errors, 4), flags2_error_mask); errors = vandq_u32(errors, flags2); index = vbicq_u32(flags2, errors); + errors = vorrq_u32(errors, vshrq_n_u32(is_tunnel, 1)); + index = vorrq_u32(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index 7f5825d333..c2523040e8 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -73,7 +73,7 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], const __m128i rss_mask = _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; - __m128i ptype_idx; + __m128i ptype_idx, is_tunnel; uint32_t ol_flags; /* Compute packet type table indexes for four packets */ @@ -100,6 +100,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = _mm_and_si128(flags2, _mm_set1_epi32(4)); + is_tunnel = _mm_slli_epi32(is_tunnel, 3); flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); @@ -107,6 +109,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); + errors = _mm_or_si128(errors, _mm_srli_epi32(is_tunnel, 1)); + index = _mm_or_si128(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); -- 2.25.1