DPDK patches and discussions
 help / color / mirror / Atom feed
From: vanshika.shukla@nxp.com
To: dev@dpdk.org, Hemant Agrawal <hemant.agrawal@nxp.com>,
	Sachin Saxena <sachin.saxena@nxp.com>
Cc: Jun Yang <jun.yang@nxp.com>
Subject: [v1 04/10] bus/dpaa: optimize bman acquire/release
Date: Wed, 28 May 2025 16:09:28 +0530	[thread overview]
Message-ID: <20250528103934.1001747-5-vanshika.shukla@nxp.com> (raw)
In-Reply-To: <20250528103934.1001747-1-vanshika.shukla@nxp.com>

From: Jun Yang <jun.yang@nxp.com>

1) Reduce byte swap between big endian and little endian.
2) Reduce ci(cache invalid) access by 128bit R/W instructions.
These methods improve ~10% buffer acquire/release performance.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/bus/dpaa/base/qbman/bman.c  | 149 ++++++++++++++++++++++++----
 drivers/bus/dpaa/include/fsl_bman.h |  20 +++-
 2 files changed, 150 insertions(+), 19 deletions(-)

diff --git a/drivers/bus/dpaa/base/qbman/bman.c b/drivers/bus/dpaa/base/qbman/bman.c
index 8a6290734f..13f535a679 100644
--- a/drivers/bus/dpaa/base/qbman/bman.c
+++ b/drivers/bus/dpaa/base/qbman/bman.c
@@ -1,18 +1,38 @@
 /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
  *
  * Copyright 2008-2016 Freescale Semiconductor Inc.
- * Copyright 2017 NXP
+ * Copyright 2017, 2024 NXP
  *
  */
+#include <rte_memcpy.h>
+#include <rte_branch_prediction.h>
+#include <eal_export.h>
 
 #include "bman.h"
-#include <rte_branch_prediction.h>
 
 /* Compilation constants */
 #define RCR_THRESH	2	/* reread h/w CI when running out of space */
 #define IRQNAME		"BMan portal %d"
 #define MAX_IRQNAME	16	/* big enough for "BMan portal %d" */
 
+#ifndef MAX_U16
+#define MAX_U16 0xffff
+#endif
+#ifndef BIT_SIZE
+#define BIT_SIZE(t) (sizeof(t) * 8)
+#endif
+#ifndef MAX_U32
+#define MAX_U32 \
+	((((uint32_t)MAX_U16) << BIT_SIZE(uint16_t)) | MAX_U16)
+#endif
+#define MAX_U48 \
+	((((uint64_t)MAX_U16) << BIT_SIZE(uint32_t)) | MAX_U32)
+#define HI16_OF_U48(x) \
+	(((x) >> BIT_SIZE(rte_be32_t)) & MAX_U16)
+#define LO32_OF_U48(x) ((x) & MAX_U32)
+#define U48_BY_HI16_LO32(hi, lo) \
+	(((hi) << BIT_SIZE(uint32_t)) | (lo))
+
 struct bman_portal {
 	struct bm_portal p;
 	/* 2-element array. pools[0] is mask, pools[1] is snapshot. */
@@ -246,7 +266,52 @@ static void update_rcr_ci(struct bman_portal *p, int avail)
 		bm_rcr_cce_update(&p->p);
 }
 
-#define BMAN_BUF_MASK 0x0000fffffffffffful
+RTE_EXPORT_INTERNAL_SYMBOL(bman_release_fast)
+int
+bman_release_fast(struct bman_pool *pool, const uint64_t *bufs,
+	uint8_t num)
+{
+	struct bman_portal *p;
+	struct bm_rcr_entry *r;
+	uint8_t i, avail;
+	uint64_t bpid = pool->params.bpid;
+	struct bm_hw_buf_desc bm_bufs[FSL_BM_BURST_MAX];
+
+#ifdef RTE_LIBRTE_DPAA_HWDEBUG
+	if (!num || (num > FSL_BM_BURST_MAX))
+		return -EINVAL;
+	if (pool->params.flags & BMAN_POOL_FLAG_NO_RELEASE)
+		return -EINVAL;
+#endif
+
+	p = get_affine_portal();
+	avail = bm_rcr_get_avail(&p->p);
+	if (avail < 2)
+		update_rcr_ci(p, avail);
+	r = bm_rcr_start(&p->p);
+	if (unlikely(!r))
+		return -EBUSY;
+
+	/*
+	 * we can copy all but the first entry, as this can trigger badness
+	 * with the valid-bit
+	 */
+	bm_bufs[0].bpid = bpid;
+	bm_bufs[0].hi_addr = cpu_to_be16(HI16_OF_U48(bufs[0]));
+	bm_bufs[0].lo_addr = cpu_to_be32(LO32_OF_U48(bufs[0]));
+	for (i = 1; i < num; i++) {
+		bm_bufs[i].hi_addr = cpu_to_be16(HI16_OF_U48(bufs[i]));
+		bm_bufs[i].lo_addr = cpu_to_be32(LO32_OF_U48(bufs[i]));
+	}
+
+	rte_memcpy(r->bufs, bm_bufs, sizeof(struct bm_buffer) * num);
+
+	bm_rcr_pvb_commit(&p->p, BM_RCR_VERB_CMD_BPID_SINGLE |
+		(num & BM_RCR_VERB_BUFCOUNT_MASK));
+
+	return 0;
+}
+
 int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 		 u32 flags __maybe_unused)
 {
@@ -256,7 +321,7 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 	u8 avail;
 
 #ifdef RTE_LIBRTE_DPAA_HWDEBUG
-	if (!num || (num > 8))
+	if (!num || (num > FSL_BM_BURST_MAX))
 		return -EINVAL;
 	if (pool->params.flags & BMAN_POOL_FLAG_NO_RELEASE)
 		return -EINVAL;
@@ -276,11 +341,11 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 	 */
 	r->bufs[0].opaque =
 		cpu_to_be64(((u64)pool->params.bpid << 48) |
-			    (bufs[0].opaque & BMAN_BUF_MASK));
+			    (bufs[0].opaque & MAX_U48));
 	if (i) {
 		for (i = 1; i < num; i++)
 			r->bufs[i].opaque =
-				cpu_to_be64(bufs[i].opaque & BMAN_BUF_MASK);
+				cpu_to_be64(bufs[i].opaque & MAX_U48);
 	}
 
 	bm_rcr_pvb_commit(&p->p, BM_RCR_VERB_CMD_BPID_SINGLE |
@@ -289,16 +354,70 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 	return 0;
 }
 
+static inline uint64_t
+bman_extract_addr(struct bm_buffer *buf)
+{
+	buf->opaque = be64_to_cpu(buf->opaque);
+
+	return buf->addr;
+}
+
+static inline uint64_t
+bman_hw_extract_addr(struct bm_hw_buf_desc *buf)
+{
+	uint64_t hi, lo;
+
+	hi = be16_to_cpu(buf->hi_addr);
+	lo = be32_to_cpu(buf->lo_addr);
+	return U48_BY_HI16_LO32(hi, lo);
+}
+
+RTE_EXPORT_INTERNAL_SYMBOL(bman_acquire_fast)
+int
+bman_acquire_fast(struct bman_pool *pool, uint64_t *bufs, uint8_t num)
+{
+	struct bman_portal *p = get_affine_portal();
+	struct bm_mc_command *mcc;
+	struct bm_mc_result *mcr;
+	uint8_t i, rst;
+	struct bm_hw_buf_desc bm_bufs[FSL_BM_BURST_MAX];
+
+#ifdef RTE_LIBRTE_DPAA_HWDEBUG
+	if (!num || (num > FSL_BM_BURST_MAX))
+		return -EINVAL;
+	if (pool->params.flags & BMAN_POOL_FLAG_ONLY_RELEASE)
+		return -EINVAL;
+#endif
+
+	mcc = bm_mc_start(&p->p);
+	mcc->acquire.bpid = pool->params.bpid;
+	bm_mc_commit(&p->p, BM_MCC_VERB_CMD_ACQUIRE |
+			(num & BM_MCC_VERB_ACQUIRE_BUFCOUNT));
+	while (!(mcr = bm_mc_result(&p->p)))
+		;
+	rst = mcr->verb & BM_MCR_VERB_ACQUIRE_BUFCOUNT;
+	if (unlikely(!rst))
+		return 0;
+
+	rte_memcpy(bm_bufs, mcr->acquire.bufs,
+		sizeof(struct bm_buffer) * rst);
+
+	for (i = 0; i < rst; i++)
+		bufs[i] = bman_hw_extract_addr(&bm_bufs[i]);
+
+	return rst;
+}
+
 int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num,
 		 u32 flags __maybe_unused)
 {
 	struct bman_portal *p = get_affine_portal();
 	struct bm_mc_command *mcc;
 	struct bm_mc_result *mcr;
-	int ret, i;
+	uint8_t rst, i;
 
 #ifdef RTE_LIBRTE_DPAA_HWDEBUG
-	if (!num || (num > 8))
+	if (!num || (num > FSL_BM_BURST_MAX))
 		return -EINVAL;
 	if (pool->params.flags & BMAN_POOL_FLAG_ONLY_RELEASE)
 		return -EINVAL;
@@ -310,15 +429,11 @@ int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num,
 			(num & BM_MCC_VERB_ACQUIRE_BUFCOUNT));
 	while (!(mcr = bm_mc_result(&p->p)))
 		cpu_relax();
-	ret = mcr->verb & BM_MCR_VERB_ACQUIRE_BUFCOUNT;
-	if (bufs) {
-		for (i = 0; i < num; i++)
-			bufs[i].opaque =
-				be64_to_cpu(mcr->acquire.bufs[i].opaque);
-	}
-	if (ret != num)
-		ret = -ENOMEM;
-	return ret;
+	rst = mcr->verb & BM_MCR_VERB_ACQUIRE_BUFCOUNT;
+	for (i = 0; i < rst; i++)
+		bufs[i].opaque = be64_to_cpu(mcr->acquire.bufs[i].opaque);
+
+	return rst;
 }
 
 int bman_query_pools(struct bm_pool_state *state)
diff --git a/drivers/bus/dpaa/include/fsl_bman.h b/drivers/bus/dpaa/include/fsl_bman.h
index c0760149e1..5ee9533434 100644
--- a/drivers/bus/dpaa/include/fsl_bman.h
+++ b/drivers/bus/dpaa/include/fsl_bman.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
  *
  * Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2024 NXP
  *
  */
 
@@ -68,6 +69,14 @@ struct __rte_aligned(8) bm_buffer {
 		u64 opaque;
 	};
 };
+
+struct __rte_packed bm_hw_buf_desc {
+	uint8_t rsv;
+	uint8_t bpid;
+	rte_be16_t hi_addr; /* High 16-bits of 48-bit address */
+	rte_be32_t lo_addr; /* Low 32-bits of 48-bit address */
+};
+
 static inline u64 bm_buffer_get64(const struct bm_buffer *buf)
 {
 	return buf->addr;
@@ -85,6 +94,8 @@ static inline dma_addr_t bm_buf_addr(const struct bm_buffer *buf)
 		__buf931->lo = lower_32_bits(v); \
 	} while (0)
 
+#define FSL_BM_BURST_MAX 8
+
 /* See 1.5.3.5.4: "Release Command" */
 struct __rte_packed_begin bm_rcr_entry {
 	union {
@@ -93,7 +104,7 @@ struct __rte_packed_begin bm_rcr_entry {
 			u8 bpid; /* used with BM_RCR_VERB_CMD_BPID_SINGLE */
 			u8 __reserved1[62];
 		};
-		struct bm_buffer bufs[8];
+		struct bm_buffer bufs[FSL_BM_BURST_MAX];
 	};
 } __rte_packed_end;
 #define BM_RCR_VERB_VBIT		0x80
@@ -148,7 +159,7 @@ struct __rte_packed_begin bm_mc_result {
 				u8 bpid;
 				u8 __reserved2[62];
 			};
-			struct bm_buffer bufs[8];
+			struct bm_buffer bufs[FSL_BM_BURST_MAX];
 		} acquire;
 		struct bm_pool_state query;
 	};
@@ -297,6 +308,9 @@ const struct bman_pool_params *bman_get_params(const struct bman_pool *pool);
 __rte_internal
 int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 		 u32 flags);
+__rte_internal
+int bman_release_fast(struct bman_pool *pool, const uint64_t *bufs,
+	uint8_t num);
 
 /**
  * bman_acquire - Acquire buffer(s) from a buffer pool
@@ -311,6 +325,8 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num,
 __rte_internal
 int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num,
 		 u32 flags);
+__rte_internal
+int bman_acquire_fast(struct bman_pool *pool, uint64_t *bufs, uint8_t num);
 
 /**
  * bman_query_pools - Query all buffer pool states
-- 
2.25.1


  parent reply	other threads:[~2025-05-28 10:40 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-28 10:39 [v1 00/10] DPAA specific fixes vanshika.shukla
2025-05-28 10:39 ` [v1 01/10] bus/dpaa: avoid using same structure and variable name vanshika.shukla
2025-05-28 10:39 ` [v1 02/10] bus/dpaa: add FMan node vanshika.shukla
2025-05-28 10:39 ` [v1 03/10] bus/dpaa: enhance DPAA SoC version vanshika.shukla
2025-05-28 14:28   ` Stephen Hemminger
2025-05-28 10:39 ` vanshika.shukla [this message]
2025-05-28 14:30   ` [v1 04/10] bus/dpaa: optimize bman acquire/release Stephen Hemminger
2025-05-28 14:50     ` [EXT] " Jun Yang
2025-05-28 10:39 ` [v1 05/10] mempool/dpaa: fast acquire and release vanshika.shukla
2025-05-28 10:39 ` [v1 06/10] mempool/dpaa: adjust pool element for LS1043A errata vanshika.shukla
2025-05-28 10:39 ` [v1 07/10] net/dpaa: add Tx rate limiting DPAA PMD API vanshika.shukla
2025-05-28 10:39 ` [v1 08/10] net/dpaa: add devargs for enabling err packets on main queue vanshika.shukla
2025-05-28 10:39 ` [v1 09/10] bus/dpaa: improve DPAA cleanup vanshika.shukla
2025-05-28 10:39 ` [v1 10/10] bus/dpaa: optimize qman enqueue check vanshika.shukla

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250528103934.1001747-5-vanshika.shukla@nxp.com \
    --to=vanshika.shukla@nxp.com \
    --cc=dev@dpdk.org \
    --cc=hemant.agrawal@nxp.com \
    --cc=jun.yang@nxp.com \
    --cc=sachin.saxena@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).