DPDK patches and discussions
 help / color / mirror / Atom feed
From: Jerin Jacob <jerinjacobk@gmail.com>
To: pbhagavatula@marvell.com
Cc: jerinj@marvell.com, Vamsi Attunuru <vattunuru@marvell.com>,
	 Bruce Richardson <bruce.richardson@intel.com>,
	 Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,
	dev@dpdk.org
Subject: Re: [PATCH v2 2/3] net/octeon_ep: use SSE instructions for Rx routine
Date: Wed, 6 Dec 2023 17:47:16 +0530	[thread overview]
Message-ID: <CALBAE1PSuSNnMVif-g0+LBUXGv3TrRT4+ELkP-gsyKfTerGh4w@mail.gmail.com> (raw)
In-Reply-To: <20231125160349.2021-2-pbhagavatula@marvell.com>

On Sat, Nov 25, 2023 at 10:52 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Optimize Rx routine to use SSE instructions.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---

> diff --git a/drivers/net/octeon_ep/cnxk_ep_rx_sse.c b/drivers/net/octeon_ep/cnxk_ep_rx_sse.c
> new file mode 100644
> index 0000000000..531f75a2e0
> --- /dev/null
> +++ b/drivers/net/octeon_ep/cnxk_ep_rx_sse.c
> @@ -0,0 +1,124 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2023 Marvell.
> + */
> +
> +#include "cnxk_ep_rx.h"
> +
> +static __rte_always_inline uint32_t
> +hadd(__m128i x)
> +{
> +       __m128i hi64 = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
> +       __m128i sum64 = _mm_add_epi32(hi64, x);
> +       __m128i hi32 = _mm_shufflelo_epi16(sum64, _MM_SHUFFLE(1, 0, 3, 2));
> +       __m128i sum32 = _mm_add_epi32(sum64, hi32);
> +       return _mm_cvtsi128_si32(sum32);
> +}
> +
> +static __rte_always_inline void
> +cnxk_ep_process_pkts_vec_sse(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
> +{
> +       struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
> +       uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
> +       uint32_t idx0, idx1, idx2, idx3;
> +       struct rte_mbuf *m0, *m1, *m2, *m3;
> +       uint16_t nb_desc = droq->nb_desc;
> +       uint16_t pkts = 0;
> +
> +       idx0 = read_idx;
> +       while (pkts < new_pkts) {
> +               const __m128i bswap_mask = _mm_set_epi8(0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 8, 9, 0xFF,
> +                                                       0xFF, 4, 5, 0xFF, 0xFF, 0, 1);
> +               const __m128i cpy_mask = _mm_set_epi8(0xFF, 0xFF, 9, 8, 0xFF, 0xFF, 9, 8, 0xFF,
> +                                                     0xFF, 1, 0, 0xFF, 0xFF, 1, 0);
> +               __m128i s01, s23;
> +
> +               idx1 = otx_ep_incr_index(idx0, 1, nb_desc);
> +               idx2 = otx_ep_incr_index(idx1, 1, nb_desc);
> +               idx3 = otx_ep_incr_index(idx2, 1, nb_desc);
> +
> +               m0 = recv_buf_list[idx0];
> +               m1 = recv_buf_list[idx1];
> +               m2 = recv_buf_list[idx2];
> +               m3 = recv_buf_list[idx3];
> +

Please add some comments for SSE usage for this section

> +               s01 = _mm_set_epi32(rte_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48,
> +                                   rte_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48,
> +                                   rte_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48,
> +                                   rte_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48);
> +               s01 = _mm_shuffle_epi8(s01, bswap_mask);
> +               bytes_rsvd += hadd(s01);
> +               s23 = _mm_shuffle_epi32(s01, _MM_SHUFFLE(3, 3, 1, 1));
> +               s01 = _mm_shuffle_epi8(s01, cpy_mask);
> +               s23 = _mm_shuffle_epi8(s23, cpy_mask);

> diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h b/drivers/net/octeon_ep/otx_ep_rxtx.h
> index b159c32cae..af657dba50 100644
> --- a/drivers/net/octeon_ep/otx_ep_rxtx.h
> +++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
> @@ -48,12 +48,22 @@ cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
>  uint16_t
>  cnxk_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
>
> +#ifdef RTE_ARCH_X86

We can skip #ifdef for function declaration. Same comment for AVX

> +uint16_t
> +cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
> +#endif
> +
>  uint16_t
>  cnxk_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
>
>  uint16_t
>  cn9k_ep_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
>
> +#ifdef RTE_ARCH_X86

We can skip #ifdef for function declaration. Same comment for AVX

> +uint16_t
> +cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
> +#endif
> +
>  uint16_t
>  cn9k_ep_recv_pkts_mseg(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t budget);
>  #endif /* _OTX_EP_RXTX_H_ */
> --
> 2.25.1
>

  reply	other threads:[~2023-12-06 12:17 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-25 16:03 [PATCH v2 1/3] net/octeon_ep: optimize Rx and Tx routines pbhagavatula
2023-11-25 16:03 ` [PATCH v2 2/3] net/octeon_ep: use SSE instructions for Rx routine pbhagavatula
2023-12-06 12:17   ` Jerin Jacob [this message]
2023-11-25 16:03 ` [PATCH v2 3/3] net/octeon_ep: use AVX2 instructions for Rx pbhagavatula
2023-12-06 12:12 ` [PATCH v2 1/3] net/octeon_ep: optimize Rx and Tx routines Jerin Jacob
2023-12-06 17:24 ` [PATCH v3 " pbhagavatula
2023-12-06 17:24   ` [PATCH v3 2/3] net/octeon_ep: use SSE instructions for Rx routine pbhagavatula
2023-12-07  6:34     ` Jerin Jacob
2023-12-06 17:24   ` [PATCH v3 3/3] net/octeon_ep: use AVX2 instructions for Rx pbhagavatula
2023-12-07  6:49   ` [PATCH v4 1/3] net/octeon_ep: optimize Rx and Tx routines pbhagavatula
2023-12-07  6:49     ` [PATCH v4 2/3] net/octeon_ep: use SSE instructions for Rx routine pbhagavatula
2023-12-07  6:49     ` [PATCH v4 3/3] net/octeon_ep: use AVX2 instructions for Rx pbhagavatula
2023-12-11 12:05       ` Jerin Jacob
2023-12-11 13:43     ` [PATCH v5 1/3] net/octeon_ep: optimize Rx and Tx routines pbhagavatula
2023-12-11 13:43       ` [PATCH v5 2/3] net/octeon_ep: use SSE instructions for Rx routine pbhagavatula
2023-12-11 13:43       ` [PATCH v5 3/3] net/octeon_ep: use AVX2 instructions for Rx pbhagavatula
2023-12-12  5:29         ` Jerin Jacob

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CALBAE1PSuSNnMVif-g0+LBUXGv3TrRT4+ELkP-gsyKfTerGh4w@mail.gmail.com \
    --to=jerinjacobk@gmail.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=jerinj@marvell.com \
    --cc=konstantin.v.ananyev@yandex.ru \
    --cc=pbhagavatula@marvell.com \
    --cc=vattunuru@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).