From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 29757A034F; Mon, 7 Feb 2022 08:31:55 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id DE966411FB; Mon, 7 Feb 2022 08:30:35 +0100 (CET) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id A42A641203 for ; Mon, 7 Feb 2022 08:30:34 +0100 (CET) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.16.1.2/8.16.1.2) with ESMTP id 216Mp8dg022472 for ; Sun, 6 Feb 2022 23:30:34 -0800 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-type; s=pfpt0220; bh=5aCviEKOVbUQwUZJHJVAkRZLYltYyoKClSzjdsUEBWQ=; b=YusuDFTHbRKh1c0aJ0n4CJeMoUGuZzrxopZoHgB9rtqaoyQoqVqPqtd+rX/qH4IxIVTD BUPMp82+cya+sJHjnWESw5eQE+JcuOjXcDyxXbXzfK+wdn7p/SBUfT9ivwrv6mOrJJ8k LSQUupOb55H+CXEieqaZePzRS39Xx0p0c4asOllwqfcXAGDQdkBaHsNg5NLARAVA3w+d pos0tWCQIM+2y9ariFFM26kaETaJLgDxsWJJwJ66ca/1YG3UFFIFC3zRz1T5DHkXe3AS C6VOOSphykjDgRtzZ9u74St5ZzkteV8tQ020bG17qcSJ7H+j1Hn9yR4kIqkenwrzNHlC BQ== Received: from dc5-exch01.marvell.com ([199.233.59.181]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3e1smr4p64-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Sun, 06 Feb 2022 23:30:33 -0800 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Sun, 6 Feb 2022 23:30:31 -0800 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend Transport; Sun, 6 Feb 2022 23:30:31 -0800 Received: from hyd1588t430.marvell.com (unknown [10.29.52.204]) by maili.marvell.com (Postfix) with ESMTP id 5826A3F705C; Sun, 6 Feb 2022 23:30:29 -0800 (PST) From: Nithin Dabilpuram To: , Pavan Nikhilesh , "Shijith Thotton" , Nithin Dabilpuram , Kiran Kumar K , Sunil Kumar Kori , Satha Rao CC: Subject: [PATCH 16/20] net/cnxk: use NPA batch burst free for meta buffers Date: Mon, 7 Feb 2022 12:59:28 +0530 Message-ID: <20220207072932.22409-16-ndabilpuram@marvell.com> X-Mailer: git-send-email 2.8.4 In-Reply-To: <20220207072932.22409-1-ndabilpuram@marvell.com> References: <20220207072932.22409-1-ndabilpuram@marvell.com> MIME-Version: 1.0 Content-Type: text/plain X-Proofpoint-GUID: B3gkJOZjiteJIEnEaIIHj44U-pK6BpW7 X-Proofpoint-ORIG-GUID: B3gkJOZjiteJIEnEaIIHj44U-pK6BpW7 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.816,Hydra:6.0.425,FMLib:17.11.62.513 definitions=2022-02-07_02,2022-02-03_01,2021-12-02_01 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Currently meta buffers are freed in bursts of one LMT line i.e 15 pointers. Instead free them in bursts of 16 LMTlines which is 240 ptrs for better perf. Also mark mempool objects as get and put in missing places. Signed-off-by: Nithin Dabilpuram --- drivers/event/cnxk/cn10k_worker.h | 13 ++++- drivers/net/cnxk/cn10k_rx.h | 114 +++++++++++++++++++++++++++++++------- 2 files changed, 107 insertions(+), 20 deletions(-) diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index 78d029b..42be92d 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -93,12 +93,16 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws, } static __rte_always_inline void -cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id, +cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id, const uint32_t tag, const uint32_t flags, const void *const lookup_mem) { const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM | (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf; + + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, (struct rte_mbuf *)mbuf, lookup_mem, @@ -154,6 +158,9 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, mbuf = (struct rte_mbuf *)((char *)cqe - sizeof(struct rte_mbuf)); + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + /* Translate meta to mbuf */ if (flags & NIX_RX_OFFLOAD_SECURITY_F) { const uint64_t cq_w1 = *((const uint64_t *)cqe + 1); @@ -275,6 +282,10 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev, *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr; cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem, ws->tstamp, ws->lmt_base); + /* Mark vector mempool object as get */ + RTE_MEMPOOL_CHECK_COOKIES( + rte_mempool_from_obj((void *)gw.u64[1]), + (void **)&gw.u64[1], 1, 1); } } diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index a2442d3..a8a4e65 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -67,6 +67,24 @@ nix_get_mbuf_from_cqe(void *cq, const uint64_t data_off) } static __rte_always_inline void +nix_sec_flush_meta_burst(uint16_t lmt_id, uint64_t data, uint16_t lnum, + uintptr_t aura_handle) +{ + uint64_t pa; + + /* Prepare PA and Data */ + pa = roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_BATCH_FREE0; + pa |= ((data & 0x7) << 4); + + data >>= 3; + data <<= 19; + data |= (uint64_t)lmt_id; + data |= (uint64_t)(lnum - 1) << 12; + + roc_lmt_submit_steorl(data, pa); +} + +static __rte_always_inline void nix_sec_flush_meta(uintptr_t laddr, uint16_t lmt_id, uint8_t loff, uintptr_t aura_handle) { @@ -82,7 +100,7 @@ nix_sec_flush_meta(uintptr_t laddr, uint16_t lmt_id, uint8_t loff, *(uint64_t *)laddr = (((uint64_t)(loff & 0x1) << 32) | roc_npa_aura_handle_to_aura(aura_handle)); - pa |= ((loff >> 1) << 4); + pa |= ((uint64_t)(loff >> 1) << 4); roc_lmt_submit_steorl(lmt_id, pa); } @@ -122,6 +140,12 @@ nix_sec_meta_to_mbuf_sc(uint64_t cq_w1, const uint64_t sa_base, uintptr_t laddr, *(uint64_t *)(laddr + (*loff << 3)) = (uint64_t)mbuf; *loff = *loff + 1; + /* Mark meta mbuf as put */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 0); + + /* Mark inner mbuf as get */ + RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&inner, 1, 1); + return inner; } return mbuf; @@ -181,6 +205,12 @@ nix_sec_meta_to_mbuf(uint64_t cq_w1, uintptr_t sa_base, uintptr_t laddr, *(uint64_t *)(laddr + (*loff << 3)) = (uint64_t)mbuf; *loff = *loff + 1; + /* Mark meta mbuf as put */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 0); + + /* Mark inner mbuf as get */ + RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&inner, 1, 1); + /* Return inner mbuf */ return inner; } @@ -306,9 +336,6 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, uint16_t len = rx->pkt_lenm1 + 1; uint64_t ol_flags = 0; - /* Mark mempool obj as "get" as it is alloc'ed by NIX */ - RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); - if (flag & NIX_RX_OFFLOAD_PTYPE_F) mbuf->packet_type = nix_ptype_get(lookup_mem, w1); else @@ -440,6 +467,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, mbuf = nix_get_mbuf_from_cqe(cq, data_off); + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + /* Translate meta to mbuf */ if (flags & NIX_RX_OFFLOAD_SECURITY_F) { const uint64_t cq_w1 = *((const uint64_t *)cq + 1); @@ -538,7 +568,7 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, uint64x2_t rearm3 = vdupq_n_u64(mbuf_initializer); struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64_t aura_handle, lbase, laddr; - uint8_t loff = 0, lnum = 0; + uint8_t loff = 0, lnum = 0, shft = 0; uint8x16_t f0, f1, f2, f3; uint16_t lmt_id, d_off; uint16_t packets = 0; @@ -709,6 +739,12 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, ol_flags3 |= nix_rx_olflags_get(lookup_mem, cq3_w1); } + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf2->pool, (void **)&mbuf2, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf3->pool, (void **)&mbuf3, 1, 1); + /* Translate meta to mbuf */ if (flags & NIX_RX_OFFLOAD_SECURITY_F) { /* Checksum ol_flags will be cleared if mbuf is meta */ @@ -905,12 +941,6 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, roc_prefetch_store_keep(mbuf2); roc_prefetch_store_keep(mbuf3); - /* Mark mempool obj as "get" as it is alloc'ed by NIX */ - RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); - RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); - RTE_MEMPOOL_CHECK_COOKIES(mbuf2->pool, (void **)&mbuf2, 1, 1); - RTE_MEMPOOL_CHECK_COOKIES(mbuf3->pool, (void **)&mbuf3, 1, 1); - packets += NIX_DESCS_PER_LOOP; if (!(flags & NIX_RX_VWQE_F)) { @@ -920,22 +950,68 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, } if (flags & NIX_RX_OFFLOAD_SECURITY_F) { - /* Flush when we don't have space for 4 meta */ - if ((15 - loff) < 4) { - nix_sec_flush_meta(laddr, lmt_id + lnum, loff, - aura_handle); + /* Check if lmtline border is crossed and adjust lnum */ + if (loff > 15) { + /* Update aura handle */ + *(uint64_t *)(laddr - 8) = + (((uint64_t)(15 & 0x1) << 32) | + roc_npa_aura_handle_to_aura( + aura_handle)); + loff = loff - 15; + shft += 3; + lnum++; - lnum &= BIT_ULL(ROC_LMT_LINES_PER_CORE_LOG2) - - 1; - /* First pointer starts at 8B offset */ laddr = (uintptr_t)LMT_OFF(lbase, lnum, 8); + /* Pick the pointer from 16th index and put it + * at end of this new line. + */ + *(uint64_t *)(laddr + (loff << 3) - 8) = + *(uint64_t *)(laddr - 8); + } + + /* Flush it when we are in 16th line and might + * overflow it + */ + if (lnum >= 15 && loff >= 12) { + /* 16 LMT Line size m1 */ + uint64_t data = BIT_ULL(48) - 1; + + /* Update aura handle */ + *(uint64_t *)(laddr - 8) = + (((uint64_t)(loff & 0x1) << 32) | + roc_npa_aura_handle_to_aura( + aura_handle)); + + data = (data & ~(0x7UL << shft)) | + (((uint64_t)loff >> 1) << shft); + + /* Send up to 16 lmt lines of pointers */ + nix_sec_flush_meta_burst(lmt_id, data, lnum + 1, + aura_handle); + rte_io_wmb(); + lnum = 0; loff = 0; + shft = 0; + /* First pointer starts at 8B offset */ + laddr = (uintptr_t)LMT_OFF(lbase, lnum, 8); } } } if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { - nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); + /* 16 LMT Line size m1 */ + uint64_t data = BIT_ULL(48) - 1; + + /* Update aura handle */ + *(uint64_t *)(laddr - 8) = + (((uint64_t)(loff & 0x1) << 32) | + roc_npa_aura_handle_to_aura(aura_handle)); + + data = (data & ~(0x7UL << shft)) | + (((uint64_t)loff >> 1) << shft); + + /* Send up to 16 lmt lines of pointers */ + nix_sec_flush_meta_burst(lmt_id, data, lnum + 1, aura_handle); if (flags & NIX_RX_VWQE_F) plt_io_wmb(); } -- 2.8.4