From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 8989F45954; Tue, 10 Sep 2024 11:02:53 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id B98FB42F03; Tue, 10 Sep 2024 11:00:51 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com [67.231.148.174]) by mails.dpdk.org (Postfix) with ESMTP id EA73442ECD for ; Tue, 10 Sep 2024 11:00:46 +0200 (CEST) Received: from pps.filterd (m0045849.ppops.net [127.0.0.1]) by mx0a-0016f401.pphosted.com (8.18.1.2/8.18.1.2) with ESMTP id 48A4qBeu029385 for ; Tue, 10 Sep 2024 02:00:46 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h= cc:content-transfer-encoding:content-type:date:from:in-reply-to :message-id:mime-version:references:subject:to; s=pfpt0220; bh=t WGKnc2H6xrntRAJ4DlbVRgqyk4eozHNBGZ4Dh4BOyU=; b=AE3avrChCeGjZpD6X vIC67wefDZ+4cA7lzsma7Pf47Lb0JBkcZuIurFmm0m5VQRdp9WOJIdQFt18fTHFa BV37kbSvDAnKmh+XR0eGQoXX/S1tW3ulIgBCKQeQCNhj761zbVvSH3akA40yZXSu azl9F7eM+T7h9SuFjTCDeVHyvMnF1YmbrtUf80I3GRMB5LV4l5smdIXnLI3J2boN BP2jZ656vkw0BL1TxBASTXWM7IHkGMRVsfNZdpyPjCc9UQMCFA8NOPDOH8NkiZ62 WpvOzyEwOtqSiNUZXtDj0z9qdz13kT6+w3m2XyrjwF1CFcwmLxDNZw1LPBoUUGrK XvJmQ== Received: from dc5-exch05.marvell.com ([199.233.59.128]) by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 41gygtg1nv-6 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=NOT) for ; Tue, 10 Sep 2024 02:00:45 -0700 (PDT) Received: from DC5-EXCH05.marvell.com (10.69.176.209) by DC5-EXCH05.marvell.com (10.69.176.209) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1544.4; Tue, 10 Sep 2024 02:00:35 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH05.marvell.com (10.69.176.209) with Microsoft SMTP Server id 15.2.1544.4 via Frontend Transport; Tue, 10 Sep 2024 02:00:35 -0700 Received: from hyd1588t430.caveonetworks.com (unknown [10.29.52.204]) by maili.marvell.com (Postfix) with ESMTP id 42C063F70A0; Tue, 10 Sep 2024 02:00:32 -0700 (PDT) From: Nithin Dabilpuram To: , Nithin Dabilpuram , "Kiran Kumar K" , Sunil Kumar Kori , Satha Rao , Harman Kalra CC: , Rahul Bhansali , Pavan Nikhilesh Subject: [PATCH 22/33] net/cnxk: support Rx burst vector for cn20k Date: Tue, 10 Sep 2024 14:28:58 +0530 Message-ID: <20240910085909.1514457-23-ndabilpuram@marvell.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240910085909.1514457-1-ndabilpuram@marvell.com> References: <20240910085909.1514457-1-ndabilpuram@marvell.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain X-Proofpoint-ORIG-GUID: w1eozu-5DZz-hjyP4BLRrmgd5wM-UtNL X-Proofpoint-GUID: w1eozu-5DZz-hjyP4BLRrmgd5wM-UtNL X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.293,Aquarius:18.0.1039,Hydra:6.0.680,FMLib:17.12.60.29 definitions=2024-09-06_09,2024-09-06_01,2024-09-02_01 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add Rx vector support for cn20k Signed-off-by: Nithin Dabilpuram Signed-off-by: Jerin Jacob Signed-off-by: Rahul Bhansali Signed-off-by: Pavan Nikhilesh --- drivers/net/cnxk/cn20k_rx.h | 463 +++++++++++++++++++++++++++++++++++- 1 file changed, 459 insertions(+), 4 deletions(-) diff --git a/drivers/net/cnxk/cn20k_rx.h b/drivers/net/cnxk/cn20k_rx.h index 22abf7bbd8..d1bf0c615e 100644 --- a/drivers/net/cnxk/cn20k_rx.h +++ b/drivers/net/cnxk/cn20k_rx.h @@ -420,6 +420,463 @@ cn20k_nix_flush_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pk return nb_pkts; } +#if defined(RTE_ARCH_ARM64) + +static __rte_always_inline uint64_t +nix_vlan_update(const uint64_t w2, uint64_t ol_flags, uint8x16_t *f) +{ + if (w2 & BIT_ULL(21) /* vtag0_gone */) { + ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; + *f = vsetq_lane_u16((uint16_t)(w2 >> 32), *f, 5); + } + + return ol_flags; +} + +static __rte_always_inline uint64_t +nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) +{ + if (w2 & BIT_ULL(23) /* vtag1_gone */) { + ol_flags |= RTE_MBUF_F_RX_QINQ | RTE_MBUF_F_RX_QINQ_STRIPPED; + mbuf->vlan_tci_outer = (uint16_t)(w2 >> 48); + } + + return ol_flags; +} + +#define NIX_PUSH_META_TO_FREE(_mbuf, _laddr, _loff_p) \ + do { \ + *(uint64_t *)((_laddr) + (*(_loff_p) << 3)) = (uint64_t)_mbuf; \ + *(_loff_p) = *(_loff_p) + 1; \ + /* Mark meta mbuf as put */ \ + RTE_MEMPOOL_CHECK_COOKIES(_mbuf->pool, (void **)&_mbuf, 1, 0); \ + } while (0) + +static __rte_always_inline uint16_t +cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, const uint16_t flags, + void *lookup_mem, struct cnxk_timesync_info *tstamp, uintptr_t lmt_base, + uint64_t meta_aura) +{ + struct cn20k_eth_rxq *rxq = args; + const uint64_t mbuf_initializer = + (flags & NIX_RX_VWQE_F) ? *(uint64_t *)args : rxq->mbuf_initializer; + const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? vdupq_n_u64(RTE_PKTMBUF_HEADROOM) : + vdupq_n_u64(rxq->data_off); + const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; + const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; + const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; + uint64x2_t cq0_w8, cq1_w8, cq2_w8, cq3_w8, mbuf01, mbuf23; + uintptr_t cpth0 = 0, cpth1 = 0, cpth2 = 0, cpth3 = 0; + uint64_t ol_flags0, ol_flags1, ol_flags2, ol_flags3; + uint64x2_t rearm0 = vdupq_n_u64(mbuf_initializer); + uint64x2_t rearm1 = vdupq_n_u64(mbuf_initializer); + uint64x2_t rearm2 = vdupq_n_u64(mbuf_initializer); + uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; + uint8x16_t f0, f1, f2, f3; + uintptr_t sa_base = 0; + uint16_t packets = 0; + uint16_t pkts_left; + uint32_t head; + uintptr_t cq0; + + (void)lmt_base; + (void)meta_aura; + + if (!(flags & NIX_RX_VWQE_F)) { + lookup_mem = rxq->lookup_mem; + head = rxq->head; + + pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1); + /* Packets has to be floor-aligned to NIX_DESCS_PER_LOOP */ + pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + tstamp = rxq->tstamp; + + cq0 = desc + CQE_SZ(head); + rte_prefetch0(CQE_PTR_OFF(cq0, 0, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 1, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 2, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 3, 64, flags)); + } else { + RTE_SET_USED(head); + } + + while (packets < pkts) { + if (!(flags & NIX_RX_VWQE_F)) { + /* Exit loop if head is about to wrap and become + * unaligned. + */ + if (((head + NIX_DESCS_PER_LOOP - 1) & qmask) < NIX_DESCS_PER_LOOP) { + pkts_left += (pkts - packets); + break; + } + + cq0 = desc + CQE_SZ(head); + } else { + cq0 = (uintptr_t)&mbufs[packets]; + } + + if (flags & NIX_RX_VWQE_F) { + if (pkts - packets > 4) { + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 4, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 5, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 6, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 7, 0, flags)); + + if (likely(pkts - packets > 8)) { + rte_prefetch1(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 11, 0, flags)); + if (pkts - packets > 12) { + rte_prefetch1(CQE_PTR_OFF(cq0, 12, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 13, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 14, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, 15, 0, flags)); + } + } + + rte_prefetch0(CQE_PTR_DIFF(cq0, 4, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, 5, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, 6, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, 7, RTE_PKTMBUF_HEADROOM, flags)); + + if (likely(pkts - packets > 8)) { + rte_prefetch0( + CQE_PTR_DIFF(cq0, 8, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0( + CQE_PTR_DIFF(cq0, 9, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0( + CQE_PTR_DIFF(cq0, 10, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0( + CQE_PTR_DIFF(cq0, 11, RTE_PKTMBUF_HEADROOM, flags)); + } + } + } else { + if (pkts - packets > 8) { + if (flags) { + rte_prefetch0(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 11, 0, flags)); + } + rte_prefetch0(CQE_PTR_OFF(cq0, 8, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 9, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 10, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 11, 64, flags)); + } + } + + if (!(flags & NIX_RX_VWQE_F)) { + /* Get NIX_RX_SG_S for size and buffer pointer */ + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + + /* Extract mbuf from NIX_RX_SG_S */ + mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); + mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); + mbuf01 = vqsubq_u64(mbuf01, data_off); + mbuf23 = vqsubq_u64(mbuf23, data_off); + } else { + mbuf01 = vsubq_u64(vld1q_u64((uint64_t *)cq0), + vdupq_n_u64(sizeof(struct rte_mbuf))); + mbuf23 = vsubq_u64(vld1q_u64((uint64_t *)(cq0 + 16)), + vdupq_n_u64(sizeof(struct rte_mbuf))); + } + + /* Move mbufs to scalar registers for future use */ + mbuf0 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 0); + mbuf1 = (struct rte_mbuf *)vgetq_lane_u64(mbuf01, 1); + mbuf2 = (struct rte_mbuf *)vgetq_lane_u64(mbuf23, 0); + mbuf3 = (struct rte_mbuf *)vgetq_lane_u64(mbuf23, 1); + + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf2->pool, (void **)&mbuf2, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf3->pool, (void **)&mbuf3, 1, 1); + + if (!(flags & NIX_RX_VWQE_F)) { + /* Mask to get packet len from NIX_RX_SG_S */ + const uint8x16_t shuf_msk = { + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0, 1, /* octet 1~0, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16it pkt_len, zero out */ + 0, 1, /* octet 1~0, 16 bits data_len */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + + /* Form the rx_descriptor_fields1 with pkt_len and data_len */ + f0 = vqtbl1q_u8(cq0_w8, shuf_msk); + f1 = vqtbl1q_u8(cq1_w8, shuf_msk); + f2 = vqtbl1q_u8(cq2_w8, shuf_msk); + f3 = vqtbl1q_u8(cq3_w8, shuf_msk); + } + + /* Load CQE word0 and word 1 */ + const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); + const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags); + const uint64_t cq0_w2 = *CQE_PTR_OFF(cq0, 0, 16, flags); + const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); + const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags); + const uint64_t cq1_w2 = *CQE_PTR_OFF(cq0, 1, 16, flags); + const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); + const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags); + const uint64_t cq2_w2 = *CQE_PTR_OFF(cq0, 2, 16, flags); + const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); + const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags); + const uint64_t cq3_w2 = *CQE_PTR_OFF(cq0, 3, 16, flags); + + if (flags & NIX_RX_VWQE_F) { + uint16_t psize0, psize1, psize2, psize3; + + psize0 = (cq0_w2 & 0xFFFF) + 1; + psize1 = (cq1_w2 & 0xFFFF) + 1; + psize2 = (cq2_w2 & 0xFFFF) + 1; + psize3 = (cq3_w2 & 0xFFFF) + 1; + + f0 = vdupq_n_u64(0); + f1 = vdupq_n_u64(0); + f2 = vdupq_n_u64(0); + f3 = vdupq_n_u64(0); + + f0 = vsetq_lane_u16(psize0, f0, 2); + f0 = vsetq_lane_u16(psize0, f0, 4); + + f1 = vsetq_lane_u16(psize1, f1, 2); + f1 = vsetq_lane_u16(psize1, f1, 4); + + f2 = vsetq_lane_u16(psize2, f2, 2); + f2 = vsetq_lane_u16(psize2, f2, 4); + + f3 = vsetq_lane_u16(psize3, f3, 2); + f3 = vsetq_lane_u16(psize3, f3, 4); + } + + if (flags & NIX_RX_OFFLOAD_RSS_F) { + /* Fill rss in the rx_descriptor_fields1 */ + f0 = vsetq_lane_u32(cq0_w0, f0, 3); + f1 = vsetq_lane_u32(cq1_w0, f1, 3); + f2 = vsetq_lane_u32(cq2_w0, f2, 3); + f3 = vsetq_lane_u32(cq3_w0, f3, 3); + ol_flags0 = RTE_MBUF_F_RX_RSS_HASH; + ol_flags1 = RTE_MBUF_F_RX_RSS_HASH; + ol_flags2 = RTE_MBUF_F_RX_RSS_HASH; + ol_flags3 = RTE_MBUF_F_RX_RSS_HASH; + } else { + ol_flags0 = 0; + ol_flags1 = 0; + ol_flags2 = 0; + ol_flags3 = 0; + } + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) { + /* Fill packet_type in the rx_descriptor_fields1 */ + f0 = vsetq_lane_u32(nix_ptype_get(lookup_mem, cq0_w1), f0, 0); + f1 = vsetq_lane_u32(nix_ptype_get(lookup_mem, cq1_w1), f1, 0); + f2 = vsetq_lane_u32(nix_ptype_get(lookup_mem, cq2_w1), f2, 0); + f3 = vsetq_lane_u32(nix_ptype_get(lookup_mem, cq3_w1), f3, 0); + } + + if (flags & NIX_RX_OFFLOAD_CHECKSUM_F) { + ol_flags0 |= (uint64_t)nix_rx_olflags_get(lookup_mem, cq0_w1); + ol_flags1 |= (uint64_t)nix_rx_olflags_get(lookup_mem, cq1_w1); + ol_flags2 |= (uint64_t)nix_rx_olflags_get(lookup_mem, cq2_w1); + ol_flags3 |= (uint64_t)nix_rx_olflags_get(lookup_mem, cq3_w1); + } + + if (flags & NIX_RX_OFFLOAD_VLAN_STRIP_F) { + ol_flags0 = nix_vlan_update(cq0_w2, ol_flags0, &f0); + ol_flags1 = nix_vlan_update(cq1_w2, ol_flags1, &f1); + ol_flags2 = nix_vlan_update(cq2_w2, ol_flags2, &f2); + ol_flags3 = nix_vlan_update(cq3_w2, ol_flags3, &f3); + + ol_flags0 = nix_qinq_update(cq0_w2, ol_flags0, mbuf0); + ol_flags1 = nix_qinq_update(cq1_w2, ol_flags1, mbuf1); + ol_flags2 = nix_qinq_update(cq2_w2, ol_flags2, mbuf2); + ol_flags3 = nix_qinq_update(cq3_w2, ol_flags3, mbuf3); + } + + if (flags & NIX_RX_OFFLOAD_MARK_UPDATE_F) { + ol_flags0 = nix_update_match_id(*(uint16_t *)CQE_PTR_OFF(cq0, 0, 38, flags), + ol_flags0, mbuf0); + ol_flags1 = nix_update_match_id(*(uint16_t *)CQE_PTR_OFF(cq0, 1, 38, flags), + ol_flags1, mbuf1); + ol_flags2 = nix_update_match_id(*(uint16_t *)CQE_PTR_OFF(cq0, 2, 38, flags), + ol_flags2, mbuf2); + ol_flags3 = nix_update_match_id(*(uint16_t *)CQE_PTR_OFF(cq0, 3, 38, flags), + ol_flags3, mbuf3); + } + + if ((flags & NIX_RX_OFFLOAD_TSTAMP_F) && ((flags & NIX_RX_VWQE_F) && tstamp)) { + const uint16x8_t len_off = {0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = { + RTE_PTYPE_L2_ETHER_TIMESYNC, RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = RTE_MBUF_F_RX_IEEE1588_PTP | + RTE_MBUF_F_RX_IEEE1588_TMST | + tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, 0), ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, 1), ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, 0), ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, 1), ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, tstamp) = ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, tstamp) = ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, tstamp) = ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, tstamp) = ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + tstamp->rx_ready = 1; + tstamp->rx_tstamp = ts[31 - rte_clz32(res)]; + } + } + + /* Form rearm_data with ol_flags */ + rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); + rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); + rearm2 = vsetq_lane_u64(ol_flags2, rearm2, 1); + rearm3 = vsetq_lane_u64(ol_flags3, rearm3, 1); + + /* Update rx_descriptor_fields1 */ + vst1q_u64((uint64_t *)mbuf0->rx_descriptor_fields1, f0); + vst1q_u64((uint64_t *)mbuf1->rx_descriptor_fields1, f1); + vst1q_u64((uint64_t *)mbuf2->rx_descriptor_fields1, f2); + vst1q_u64((uint64_t *)mbuf3->rx_descriptor_fields1, f3); + + /* Update rearm_data */ + vst1q_u64((uint64_t *)mbuf0->rearm_data, rearm0); + vst1q_u64((uint64_t *)mbuf1->rearm_data, rearm1); + vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); + vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); + + if (flags & NIX_RX_MULTI_SEG_F) { + /* Multi segment is enable build mseg list for + * individual mbufs in scalar mode. + */ + nix_cqe_xtract_mseg((union nix_rx_parse_u *)(CQE_PTR_OFF(cq0, 0, 8, flags)), + mbuf0, mbuf_initializer, cpth0, sa_base, flags); + nix_cqe_xtract_mseg((union nix_rx_parse_u *)(CQE_PTR_OFF(cq0, 1, 8, flags)), + mbuf1, mbuf_initializer, cpth1, sa_base, flags); + nix_cqe_xtract_mseg((union nix_rx_parse_u *)(CQE_PTR_OFF(cq0, 2, 8, flags)), + mbuf2, mbuf_initializer, cpth2, sa_base, flags); + nix_cqe_xtract_mseg((union nix_rx_parse_u *)(CQE_PTR_OFF(cq0, 3, 8, flags)), + mbuf3, mbuf_initializer, cpth3, sa_base, flags); + } + + /* Store the mbufs to rx_pkts */ + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); + + nix_mbuf_validate_next(mbuf0); + nix_mbuf_validate_next(mbuf1); + nix_mbuf_validate_next(mbuf2); + nix_mbuf_validate_next(mbuf3); + + packets += NIX_DESCS_PER_LOOP; + + if (!(flags & NIX_RX_VWQE_F)) { + /* Advance head pointer and packets */ + head += NIX_DESCS_PER_LOOP; + head &= qmask; + } + } + + if (flags & NIX_RX_VWQE_F) + return packets; + + rxq->head = head; + rxq->available -= packets; + + rte_io_wmb(); + /* Free all the CQs that we've processed */ + plt_write64((rxq->wdata | packets), rxq->cq_door); + + if (unlikely(pkts_left)) + packets += cn20k_nix_recv_pkts(args, &mbufs[packets], pkts_left, flags); + + return packets; +} + +#else + +static inline uint16_t +cn20k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, const uint16_t flags, + void *lookup_mem, struct cnxk_timesync_info *tstamp, uintptr_t lmt_base, + uint64_t meta_aura) +{ + RTE_SET_USED(args); + RTE_SET_USED(mbufs); + RTE_SET_USED(pkts); + RTE_SET_USED(flags); + RTE_SET_USED(lookup_mem); + RTE_SET_USED(tstamp); + RTE_SET_USED(lmt_base); + RTE_SET_USED(meta_aura); + + return 0; +} + +#endif + #define RSS_F NIX_RX_OFFLOAD_RSS_F #define PTYPE_F NIX_RX_OFFLOAD_PTYPE_F #define CKSUM_F NIX_RX_OFFLOAD_CHECKSUM_F @@ -618,10 +1075,8 @@ NIX_RX_FASTPATH_MODES uint16_t __rte_noinline __rte_hot fn(void *rx_queue, struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - RTE_SET_USED(rx_queue); \ - RTE_SET_USED(rx_pkts); \ - RTE_SET_USED(pkts); \ - return 0; \ + return cn20k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, (flags), NULL, NULL, 0, \ + 0); \ } #define NIX_RX_RECV_VEC_MSEG(fn, flags) NIX_RX_RECV_VEC(fn, flags | NIX_RX_MULTI_SEG_F) -- 2.34.1