DPDK patches and discussions
 help / color / mirror / Atom feed
From: Dongdong Liu <liudongdong3@huawei.com>
To: <dev@dpdk.org>, <ferruh.yigit@amd.com>, <thomas@monjalon.net>,
	<andrew.rybchenko@oktetlabs.ru>
Cc: <stable@dpdk.org>
Subject: [PATCH 2/5] net/hns3: fix the order of NEON Rx code
Date: Tue, 11 Jul 2023 18:24:45 +0800	[thread overview]
Message-ID: <20230711102448.11627-3-liudongdong3@huawei.com> (raw)
In-Reply-To: <20230711102448.11627-1-liudongdong3@huawei.com>

From: Huisong Li <lihuisong@huawei.com>

This patch reorders the order of the NEON Rx for better maintenance
and easier understanding.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Cc: stable@dpdk.org

Signed-off-by: Huisong Li <lihuisong@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
---
 drivers/net/hns3/hns3_rxtx_vec_neon.h | 78 +++++++++++----------------
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index 564d831a48..0dc6b9f0a2 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -180,19 +180,12 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
 		bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);
 
-		/* load 2 mbuf pointer */
-		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
-
 		bd_vld = vshl_n_u16(bd_vld,
 				    HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
 		bd_vld = vreinterpret_u16_s16(
 				vshr_n_s16(vreinterpret_s16_u16(bd_vld),
 					   HNS3_UINT16_BIT - 1));
 		stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
-
-		/* load 2 mbuf pointer again */
-		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
-
 		if (likely(stat == 0))
 			bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
 		else
@@ -200,20 +193,20 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		if (bd_valid_num == 0)
 			break;
 
-		/* use offset to control below data load oper ordering */
-		offset = rxq->offset_table[bd_valid_num];
+		/* load 4 mbuf pointer */
+		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
+		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
 
-		/* store 2 mbuf pointer into rx_pkts */
+		/* store 4 mbuf pointer into rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
+		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
 
-		/* read first two descs */
+		/* use offset to control below data load oper ordering */
+		offset = rxq->offset_table[bd_valid_num];
+
+		/* read 4 descs */
 		descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
 		descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
-
-		/* store 2 mbuf pointer into rx_pkts again */
-		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
-
-		/* read remains two descs */
 		descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
 		descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));
 
@@ -221,56 +214,47 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
 		pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
 		pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
+		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
+		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
+		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
+		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
 
-		/* pkt 1,2 convert format from desc to pktmbuf */
+		/* 4 packets convert format from desc to pktmbuf */
 		pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
 		pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
+		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
+		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
 
-		/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
-		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-
-		/* pkt 1,2 remove crc */
+		/* 4 packets remove crc */
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
 		pkt_mb1 = vreinterpretq_u8_u16(tmp);
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
 		pkt_mb2 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+		pkt_mb3 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+		pkt_mb4 = vreinterpretq_u8_u16(tmp);
 
-		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
-		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
-		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
-		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
-
-		/* pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
-		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
-
-		/* pkt 1,2 save to rx_pkts mbuf */
+		/* save packet info to rx_pkts mbuf */
 		vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
 			 pkt_mb1);
 		vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
 			 pkt_mb2);
+		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
+			 pkt_mb3);
+		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
+			 pkt_mb4);
 
-		/* pkt 3,4 remove crc */
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
-		pkt_mb3 = vreinterpretq_u8_u16(tmp);
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
-		pkt_mb4 = vreinterpretq_u8_u16(tmp);
-
-		/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
+		/* store the first 8 bytes of packets mbuf's rearm_data */
+		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
+			rxq->mbuf_initializer;
+		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
+			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
 			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
 			rxq->mbuf_initializer;
 
-		/* pkt 3,4 save to rx_pkts mbuf */
-		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
-			 pkt_mb3);
-		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
-			 pkt_mb4);
-
 		rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);
 
 		parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
-- 
2.22.0


  parent reply	other threads:[~2023-07-11 10:28 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-11 10:24 [PATCH 0/5] net/hns3: some performance optimizations Dongdong Liu
2023-07-11 10:24 ` [PATCH 1/5] net/hns3: fix incorrect index to look up table in NEON Rx Dongdong Liu
2023-07-11 12:58   ` Ferruh Yigit
2023-07-11 10:24 ` Dongdong Liu [this message]
2023-07-11 10:24 ` [PATCH 3/5] net/hns3: optimize free mbuf code for SVE Tx Dongdong Liu
2023-09-25 14:21   ` Ferruh Yigit
2023-09-26  4:03     ` lihuisong (C)
2023-07-11 10:24 ` [PATCH 4/5] net/hns3: optimize the rearm mbuf function for SVE Rx Dongdong Liu
2023-07-11 10:24 ` [PATCH 5/5] net/hns3: optimize SVE Rx performance Dongdong Liu
2023-07-11 10:48 ` [PATCH 0/5] net/hns3: some performance optimizations Ferruh Yigit
2023-07-11 11:27   ` Dongdong Liu
2023-07-11 12:26     ` Ferruh Yigit
2023-09-25 14:26       ` Ferruh Yigit
2023-09-25  2:33 ` Jie Hai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230711102448.11627-3-liudongdong3@huawei.com \
    --to=liudongdong3@huawei.com \
    --cc=andrew.rybchenko@oktetlabs.ru \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@amd.com \
    --cc=stable@dpdk.org \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).