* [dpdk-dev] [PATCH] net/bnxt: use shorter SIMD initializers
@ 2020-10-22 18:50 Lance Richardson
2020-10-26 4:10 ` Ajit Khaparde
0 siblings, 1 reply; 2+ messages in thread
From: Lance Richardson @ 2020-10-22 18:50 UTC (permalink / raw)
To: Ajit Khaparde, Somnath Kotur; +Cc: dev
Make SIMD initialization code less verbose by using appropriate
intrinsics when all lanes of a vector are initialized to the
same value.
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++--------------------
drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 37 +++++------------
2 files changed, 23 insertions(+), 72 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index f49e29ccb..de1d96570 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4],
0xFF, 0xFF, /* vlan_tci (zeroes) */
12, 13, 14, 15 /* rss hash */
};
- const uint32x4_t flags_type_mask = {
- RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK
- };
- const uint32x4_t flags2_mask1 = {
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
- };
- const uint32x4_t flags2_mask2 = {
- RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE
- };
- const uint32x4_t rss_mask = {
- RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID
- };
- const uint32x4_t flags2_index_mask = {
- 0x1F, 0x1F, 0x1F, 0x1F
- };
- const uint32x4_t flags2_error_mask = {
- 0xF, 0xF, 0xF, 0xF
- };
+ const uint32x4_t flags_type_mask =
+ vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
+ const uint32x4_t flags2_mask1 =
+ vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+ RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
+ const uint32x4_t flags2_mask2 =
+ vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
+ const uint32x4_t rss_mask =
+ vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID);
+ const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F);
+ const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F);
uint32x4_t flags_type, flags2, index, errors, rss_flags;
uint32x4_t tmp, ptype_idx;
uint64x2_t t0, t1;
@@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
uint64_t valid, desc_valid_mask = ~0UL;
- const uint32x4_t info3_v_mask = {
- CMPL_BASE_V, CMPL_BASE_V,
- CMPL_BASE_V, CMPL_BASE_V
- };
+ const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V);
uint32_t raw_cons = cpr->cp_raw_cons;
uint32_t cons, mbcons;
int nb_rx_pkts = 0;
const uint64x2_t mb_init = {rxq->mbuf_initializer, 0};
- const uint32x4_t valid_target = {
- !!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size)
- };
+ const uint32x4_t valid_target =
+ vdupq_n_u32(!!(raw_cons & cp_ring_size));
int i;
/* If Rx Q was stopped return */
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index e4ba63551..e12bf8bb7 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
0xFF, 0xFF, 3, 2, /* pkt_len */
0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
const __m128i flags_type_mask =
- _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK,
- RX_PKT_CMPL_FLAGS_ITYPE_MASK);
+ _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
const __m128i flags2_mask1 =
- _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
- RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
- RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
+ _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
+ RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
const __m128i flags2_mask2 =
- _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE,
- RX_PKT_CMPL_FLAGS2_IP_TYPE);
+ _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
const __m128i rss_mask =
- _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID,
- RX_PKT_CMPL_FLAGS_RSS_VALID);
+ _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID);
__m128i t0, t1, flags_type, flags2, index, errors, rss_flags;
__m128i ptype_idx;
uint32_t ol_flags;
@@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]);
/* Compute ol_flags and checksum error indexes for four packets. */
- flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F));
+ flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F));
errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4);
- errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF));
+ errors = _mm_and_si128(errors, _mm_set1_epi32(0xF));
errors = _mm_and_si128(errors, flags2);
index = _mm_andnot_si128(errors, flags2);
@@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
uint64_t valid, desc_valid_mask = ~0ULL;
- const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V,
- CMPL_BASE_V, CMPL_BASE_V);
+ const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V);
uint32_t raw_cons = cpr->cp_raw_cons;
uint32_t cons, mbcons;
int nb_rx_pkts = 0;
const __m128i valid_target =
- _mm_set_epi32(!!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size),
- !!(raw_cons & cp_ring_size));
+ _mm_set1_epi32(!!(raw_cons & cp_ring_size));
int i;
/* If Rx Q was stopped return */
--
2.25.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [dpdk-dev] [PATCH] net/bnxt: use shorter SIMD initializers
2020-10-22 18:50 [dpdk-dev] [PATCH] net/bnxt: use shorter SIMD initializers Lance Richardson
@ 2020-10-26 4:10 ` Ajit Khaparde
0 siblings, 0 replies; 2+ messages in thread
From: Ajit Khaparde @ 2020-10-26 4:10 UTC (permalink / raw)
To: Lance Richardson; +Cc: Somnath Kotur, dpdk-dev
On Thu, Oct 22, 2020 at 11:51 AM Lance Richardson
<lance.richardson@broadcom.com> wrote:
>
> Make SIMD initialization code less verbose by using appropriate
> intrinsics when all lanes of a vector are initialized to the
> same value.
>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Patch applied to dpdk-next-net-brcm.
> ---
> drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++--------------------
> drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 37 +++++------------
> 2 files changed, 23 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index f49e29ccb..de1d96570 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4],
> 0xFF, 0xFF, /* vlan_tci (zeroes) */
> 12, 13, 14, 15 /* rss hash */
> };
> - const uint32x4_t flags_type_mask = {
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK
> - };
> - const uint32x4_t flags2_mask1 = {
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
> - };
> - const uint32x4_t flags2_mask2 = {
> - RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE
> - };
> - const uint32x4_t rss_mask = {
> - RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID
> - };
> - const uint32x4_t flags2_index_mask = {
> - 0x1F, 0x1F, 0x1F, 0x1F
> - };
> - const uint32x4_t flags2_error_mask = {
> - 0xF, 0xF, 0xF, 0xF
> - };
> + const uint32x4_t flags_type_mask =
> + vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> + const uint32x4_t flags2_mask1 =
> + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> + const uint32x4_t flags2_mask2 =
> + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
> + const uint32x4_t rss_mask =
> + vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID);
> + const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F);
> + const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F);
> uint32x4_t flags_type, flags2, index, errors, rss_flags;
> uint32x4_t tmp, ptype_idx;
> uint64x2_t t0, t1;
> @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
> struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
> uint64_t valid, desc_valid_mask = ~0UL;
> - const uint32x4_t info3_v_mask = {
> - CMPL_BASE_V, CMPL_BASE_V,
> - CMPL_BASE_V, CMPL_BASE_V
> - };
> + const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V);
> uint32_t raw_cons = cpr->cp_raw_cons;
> uint32_t cons, mbcons;
> int nb_rx_pkts = 0;
> const uint64x2_t mb_init = {rxq->mbuf_initializer, 0};
> - const uint32x4_t valid_target = {
> - !!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size)
> - };
> + const uint32x4_t valid_target =
> + vdupq_n_u32(!!(raw_cons & cp_ring_size));
> int i;
>
> /* If Rx Q was stopped return */
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index e4ba63551..e12bf8bb7 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
> 0xFF, 0xFF, 3, 2, /* pkt_len */
> 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
> const __m128i flags_type_mask =
> - _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK,
> - RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK);
> const __m128i flags2_mask1 =
> - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC,
> - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
> + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC);
> const __m128i flags2_mask2 =
> - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE,
> - RX_PKT_CMPL_FLAGS2_IP_TYPE);
> + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE);
> const __m128i rss_mask =
> - _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID,
> - RX_PKT_CMPL_FLAGS_RSS_VALID);
> + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID);
> __m128i t0, t1, flags_type, flags2, index, errors, rss_flags;
> __m128i ptype_idx;
> uint32_t ol_flags;
> @@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4],
> t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]);
>
> /* Compute ol_flags and checksum error indexes for four packets. */
> - flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F));
> + flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F));
>
> errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4);
> - errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF));
> + errors = _mm_and_si128(errors, _mm_set1_epi32(0xF));
> errors = _mm_and_si128(errors, flags2);
>
> index = _mm_andnot_si128(errors, flags2);
> @@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
> struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
> uint64_t valid, desc_valid_mask = ~0ULL;
> - const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V,
> - CMPL_BASE_V, CMPL_BASE_V);
> + const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V);
> uint32_t raw_cons = cpr->cp_raw_cons;
> uint32_t cons, mbcons;
> int nb_rx_pkts = 0;
> const __m128i valid_target =
> - _mm_set_epi32(!!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size),
> - !!(raw_cons & cp_ring_size));
> + _mm_set1_epi32(!!(raw_cons & cp_ring_size));
> int i;
>
> /* If Rx Q was stopped return */
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-10-26 4:10 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-22 18:50 [dpdk-dev] [PATCH] net/bnxt: use shorter SIMD initializers Lance Richardson
2020-10-26 4:10 ` Ajit Khaparde
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).