From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 6A705A0547; Fri, 5 Mar 2021 14:42:05 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id E7DEF22A39C; Fri, 5 Mar 2021 14:40:12 +0100 (CET) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id A0E2222A39C for ; Fri, 5 Mar 2021 14:40:11 +0100 (CET) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id 125De000008966 for ; Fri, 5 Mar 2021 05:40:11 -0800 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-type; s=pfpt0220; bh=mewvWGUtcu/Gz6e6Y2BNGxroPUHlSY+mAp2IJss/qkk=; b=F5SicNKLYKaNMACZffls92Sh+Fe3xnPrz/jgUCPJ5jvbNoZM9o5G+h/cnUeSrulOnuDf JCLR/iE+tvMG4X7HwGCQHqVkj2yoMAIR4Utiyr/t3S6q7zV7J0cnLj3m2FsdegtGfqui 0AQ+WN23+tyMe1Njg52U8Bk19uC0QSePRzxtTe4hOiSI2VncCZQeurhcfboxyeGLGpxe pboQeI94XXEVWNzvoRH90rRN69YU9calWzUZrZye+/E0gBsGPr7/b2hZvlBb8M+AXszk 5AUBYBuPWgr/a/8EUf/4PlhRIcnMHMF37SSKNMlVedy0b0qaVLb+S7gBpdU18G+OWLYz +g== Received: from dc5-exch02.marvell.com ([199.233.59.182]) by mx0b-0016f401.pphosted.com with ESMTP id 370p7p0dc3-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Fri, 05 Mar 2021 05:40:11 -0800 Received: from SC-EXCH03.marvell.com (10.93.176.83) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Fri, 5 Mar 2021 05:40:09 -0800 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by SC-EXCH03.marvell.com (10.93.176.83) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Fri, 5 Mar 2021 05:40:08 -0800 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend Transport; Fri, 5 Mar 2021 05:40:08 -0800 Received: from hyd1588t430.marvell.com (unknown [10.29.52.204]) by maili.marvell.com (Postfix) with ESMTP id EFDA73F7040; Fri, 5 Mar 2021 05:40:05 -0800 (PST) From: Nithin Dabilpuram To: CC: , , , , , , Date: Fri, 5 Mar 2021 19:08:39 +0530 Message-ID: <20210305133918.8005-14-ndabilpuram@marvell.com> X-Mailer: git-send-email 2.8.4 In-Reply-To: <20210305133918.8005-1-ndabilpuram@marvell.com> References: <20210305133918.8005-1-ndabilpuram@marvell.com> MIME-Version: 1.0 Content-Type: text/plain X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.369, 18.0.761 definitions=2021-03-05_08:2021-03-03, 2021-03-05 signatures=0 Subject: [dpdk-dev] [PATCH 13/52] common/cnxk: add npa bulk alloc/free support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Ashwin Sekhar T K Add APIs to alloc/free in bulk from NPA pool. Signed-off-by: Ashwin Sekhar T K --- drivers/common/cnxk/roc_npa.h | 229 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h index 0dffede..ed63718 100644 --- a/drivers/common/cnxk/roc_npa.h +++ b/drivers/common/cnxk/roc_npa.h @@ -8,6 +8,11 @@ #define ROC_AURA_ID_MASK (BIT_ULL(16) - 1) #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1) +/* 16 CASP instructions can be outstanding in CN9k, but we use only 15 + * outstanding CASPs as we run out of registers. + */ +#define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30 + /* * Generate 64bit handle to have optimized alloc and free aura operation. * 0 - ROC_AURA_ID_MASK for storing the aura_id. @@ -141,6 +146,230 @@ roc_npa_aura_op_available(uint64_t aura_handle) return reg & 0xFFFFFFFFF; } +static inline void +roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf, + unsigned int num, const int fabs) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const uint64_t inbuf = buf[i]; + + roc_npa_aura_op_free(aura_handle, fabs, inbuf); + } +} + +static inline unsigned int +roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num, + const int drop) +{ +#if defined(__aarch64__) + uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle); + unsigned int i, count; + uint64_t addr; + + if (drop) + wdata |= BIT_ULL(63); /* DROP */ + + addr = roc_npa_aura_handle_to_base(aura_handle) + + NPA_LF_AURA_OP_ALLOCX(0); + + switch (num) { + case 30: + asm volatile( + ".cpu generic+lse\n" + "mov v18.d[0], %[dst]\n" + "mov v18.d[1], %[loc]\n" + "mov v19.d[0], %[wdata]\n" + "mov v19.d[1], x30\n" + "mov v20.d[0], x24\n" + "mov v20.d[1], x25\n" + "mov v21.d[0], x26\n" + "mov v21.d[1], x27\n" + "mov v22.d[0], x28\n" + "mov v22.d[1], x29\n" + "mov x28, v19.d[0]\n" + "mov x29, v19.d[0]\n" + "mov x30, v18.d[1]\n" + "casp x0, x1, x28, x29, [x30]\n" + "casp x2, x3, x28, x29, [x30]\n" + "casp x4, x5, x28, x29, [x30]\n" + "casp x6, x7, x28, x29, [x30]\n" + "casp x8, x9, x28, x29, [x30]\n" + "casp x10, x11, x28, x29, [x30]\n" + "casp x12, x13, x28, x29, [x30]\n" + "casp x14, x15, x28, x29, [x30]\n" + "casp x16, x17, x28, x29, [x30]\n" + "casp x18, x19, x28, x29, [x30]\n" + "casp x20, x21, x28, x29, [x30]\n" + "casp x22, x23, x28, x29, [x30]\n" + "casp x24, x25, x28, x29, [x30]\n" + "casp x26, x27, x28, x29, [x30]\n" + "casp x28, x29, x28, x29, [x30]\n" + "mov x30, v18.d[0]\n" + "stp x0, x1, [x30]\n" + "stp x2, x3, [x30, #16]\n" + "stp x4, x5, [x30, #32]\n" + "stp x6, x7, [x30, #48]\n" + "stp x8, x9, [x30, #64]\n" + "stp x10, x11, [x30, #80]\n" + "stp x12, x13, [x30, #96]\n" + "stp x14, x15, [x30, #112]\n" + "stp x16, x17, [x30, #128]\n" + "stp x18, x19, [x30, #144]\n" + "stp x20, x21, [x30, #160]\n" + "stp x22, x23, [x30, #176]\n" + "stp x24, x25, [x30, #192]\n" + "stp x26, x27, [x30, #208]\n" + "stp x28, x29, [x30, #224]\n" + "mov %[dst], v18.d[0]\n" + "mov %[loc], v18.d[1]\n" + "mov %[wdata], v19.d[0]\n" + "mov x30, v19.d[1]\n" + "mov x24, v20.d[0]\n" + "mov x25, v20.d[1]\n" + "mov x26, v21.d[0]\n" + "mov x27, v21.d[1]\n" + "mov x28, v22.d[0]\n" + "mov x29, v22.d[1]\n" + : + : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "v18", "v19", "v20", "v21", "v22"); + break; + case 16: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "casp x4, x5, x16, x17, [%[loc]]\n" + "casp x6, x7, x16, x17, [%[loc]]\n" + "casp x8, x9, x16, x17, [%[loc]]\n" + "casp x10, x11, x16, x17, [%[loc]]\n" + "casp x12, x13, x16, x17, [%[loc]]\n" + "casp x14, x15, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + "stp x4, x5, [%[dst], #32]\n" + "stp x6, x7, [%[dst], #48]\n" + "stp x8, x9, [%[dst], #64]\n" + "stp x10, x11, [%[dst], #80]\n" + "stp x12, x13, [%[dst], #96]\n" + "stp x14, x15, [%[dst], #112]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17" + ); + break; + case 8: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "casp x4, x5, x16, x17, [%[loc]]\n" + "casp x6, x7, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + "stp x4, x5, [%[dst], #32]\n" + "stp x6, x7, [%[dst], #48]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x16", "x17" + ); + break; + case 4: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x16", "x17" + ); + break; + case 2: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x16", "x17" + ); + break; + case 1: + buf[0] = roc_npa_aura_op_alloc(aura_handle, drop); + return !!buf[0]; + } + + /* Pack the pointers */ + for (i = 0, count = 0; i < num; i++) + if (buf[i]) + buf[count++] = buf[i]; + + return count; +#else + unsigned int i, count; + + for (i = 0, count = 0; i < num; i++) { + buf[count] = roc_npa_aura_op_alloc(aura_handle, drop); + if (buf[count]) + count++; + } + + return count; +#endif +} + +static inline unsigned int +roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf, + unsigned int num, const int drop, const int partial) +{ + unsigned int chunk, count, num_alloc; + + count = 0; + while (num) { + chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ? + ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS : + plt_align32prevpow2(num); + + num_alloc = + roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop); + + count += num_alloc; + buf += num_alloc; + num -= num_alloc; + + if (unlikely(num_alloc != chunk)) + break; + } + + /* If the requested number of pointers was not allocated and if partial + * alloc is not desired, then free allocated pointers. + */ + if (unlikely(num != 0 && !partial)) { + roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1); + count = 0; + } + + return count; +} + struct roc_npa { struct plt_pci_device *pci_dev; -- 2.8.4