* [PATCH 2/2] common/cnxk: add new APIs for batch operations
2023-05-26 13:45 [PATCH 1/2] mempool/cnxk: avoid indefinite wait Ashwin Sekhar T K
@ 2023-05-26 13:45 ` Ashwin Sekhar T K
2023-05-29 9:14 ` [PATCH 1/2] mempool/cnxk: avoid indefinite wait Jerin Jacob
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Ashwin Sekhar T K @ 2023-05-26 13:45 UTC (permalink / raw)
To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: jerinj, pbhagavatula, psatheesh, asekhar, anoobj, gakhil, hkalra
Add new APIs for counting and extracting allocated objects
from a single cache line in the batch alloc memory.
Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
1 file changed, 67 insertions(+), 11 deletions(-)
diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index d3caa71586..0653531198 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -209,7 +209,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
unsigned int num, const int dis_wait,
const int drop)
{
- unsigned int i;
int64_t *addr;
uint64_t res;
union {
@@ -220,10 +219,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
return -1;
- /* Zero first word of every cache line */
- for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
- buf[i] = 0;
-
addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
NPA_LF_AURA_BATCH_ALLOC);
cmp.u = 0;
@@ -240,6 +235,9 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
return 0;
}
+/*
+ * Wait for a batch alloc operation on a cache line to complete.
+ */
static inline void
roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
{
@@ -255,6 +253,23 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
break;
}
+/*
+ * Count the number of pointers in a single batch alloc cache line.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
+{
+ struct npa_batch_alloc_status_s *status;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, wait_us);
+
+ return status->count;
+}
+
+/*
+ * Count the number of pointers in a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
unsigned int wait_us)
@@ -279,6 +294,40 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
return count;
}
+/*
+ * Extract allocated pointers from a single batch alloc cache line. This api
+ * only extracts the required number of pointers from the cache line and it
+ * adjusts the statsus->count so that a subsequent call to this api can
+ * extract the remaining pointers in the cache line appropriately.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
+ unsigned int num, unsigned int *rem)
+{
+ struct npa_batch_alloc_status_s *status;
+ unsigned int avail;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, 0);
+ avail = status->count;
+ num = avail > num ? num : avail;
+ if (num)
+ memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
+ avail -= num;
+ if (avail == 0) {
+ /* Clear the lowest 7 bits of the first pointer */
+ buf[0] &= ~0x7FUL;
+ status->ccode = 0;
+ }
+ status->count = avail;
+ *rem = avail;
+
+ return num;
+}
+
+/*
+ * Extract all allocated pointers from a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
unsigned int num)
@@ -330,11 +379,15 @@ roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
}
}
+/*
+ * Issue a batch alloc operation on a sequence of cache lines, wait for the
+ * batch alloc to complete and copy the pointers out into the user buffer.
+ */
static inline unsigned int
roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
- uint64_t *aligned_buf, unsigned int num,
- const int dis_wait, const int drop,
- const int partial)
+ unsigned int num, uint64_t *aligned_buf,
+ unsigned int aligned_buf_sz, const int dis_wait,
+ const int drop, const int partial)
{
unsigned int count, chunk, num_alloc;
@@ -344,9 +397,12 @@ roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
count = 0;
while (num) {
- chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
- ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
- num;
+ /* Make sure that the pointers allocated fit into the cache
+ * lines reserved.
+ */
+ chunk = aligned_buf_sz / sizeof(uint64_t);
+ chunk = PLT_MIN(num, chunk);
+ chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
chunk, dis_wait, drop))
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] mempool/cnxk: avoid indefinite wait
2023-05-26 13:45 [PATCH 1/2] mempool/cnxk: avoid indefinite wait Ashwin Sekhar T K
2023-05-26 13:45 ` [PATCH 2/2] common/cnxk: add new APIs for batch operations Ashwin Sekhar T K
@ 2023-05-29 9:14 ` Jerin Jacob
2023-05-29 9:25 ` [PATCH v2 1/2] mempool/cnxk: fix indefinite wait in batch alloc Ashwin Sekhar T K
2023-05-30 9:12 ` [PATCH v3] " Ashwin Sekhar T K
3 siblings, 0 replies; 7+ messages in thread
From: Jerin Jacob @ 2023-05-29 9:14 UTC (permalink / raw)
To: Ashwin Sekhar T K
Cc: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Pavan Nikhilesh, jerinj, psatheesh, anoobj, gakhil,
hkalra
On Fri, May 26, 2023 at 7:15 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Avoid waiting indefinitely when counting batch alloc
> pointers by adding a wait timeout.
Please add Fixes: and change the subject starts with "fix ..."
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
> ---
> drivers/common/cnxk/roc_npa.h | 15 +++++++++------
> drivers/mempool/cnxk/cn10k_mempool_ops.c | 3 ++-
> 2 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index 21608a40d9..d3caa71586 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -241,19 +241,23 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
> }
>
> static inline void
> -roc_npa_batch_alloc_wait(uint64_t *cache_line)
> +roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
> {
> + const uint64_t ticks = (uint64_t)wait_us * plt_tsc_hz() / (uint64_t)1E6;
> + const uint64_t start = plt_tsc_cycles();
> +
> /* Batch alloc status code is updated in bits [5:6] of the first word
> * of the 128 byte cache line.
> */
> while (((__atomic_load_n(cache_line, __ATOMIC_RELAXED) >> 5) & 0x3) ==
> ALLOC_CCODE_INVAL)
> - ;
> + if (wait_us && (plt_tsc_cycles() - start) >= ticks)
> + break;
> }
>
> static inline unsigned int
> roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
> - unsigned int do_wait)
> + unsigned int wait_us)
> {
> unsigned int count, i;
>
> @@ -267,8 +271,7 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
>
> status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
>
> - if (do_wait)
> - roc_npa_batch_alloc_wait(&aligned_buf[i]);
> + roc_npa_batch_alloc_wait(&aligned_buf[i], wait_us);
>
> count += status->count;
> }
> @@ -293,7 +296,7 @@ roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
>
> status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
>
> - roc_npa_batch_alloc_wait(&aligned_buf[i]);
> + roc_npa_batch_alloc_wait(&aligned_buf[i], 0);
>
> line_count = status->count;
>
> diff --git a/drivers/mempool/cnxk/cn10k_mempool_ops.c b/drivers/mempool/cnxk/cn10k_mempool_ops.c
> index ba826f0f01..ff0015d8de 100644
> --- a/drivers/mempool/cnxk/cn10k_mempool_ops.c
> +++ b/drivers/mempool/cnxk/cn10k_mempool_ops.c
> @@ -9,6 +9,7 @@
>
> #define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
> #define BATCH_OP_DATA_TABLE_MZ_NAME "batch_op_data_table_mz"
> +#define BATCH_ALLOC_WAIT_US 5
>
> enum batch_op_status {
> BATCH_ALLOC_OP_NOT_ISSUED = 0,
> @@ -178,7 +179,7 @@ cn10k_mempool_get_count(const struct rte_mempool *mp)
>
> if (mem->status == BATCH_ALLOC_OP_ISSUED)
> count += roc_npa_aura_batch_alloc_count(
> - mem->objs, BATCH_ALLOC_SZ, 1);
> + mem->objs, BATCH_ALLOC_SZ, BATCH_ALLOC_WAIT_US);
>
> if (mem->status == BATCH_ALLOC_OP_DONE)
> count += mem->sz;
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 1/2] mempool/cnxk: fix indefinite wait in batch alloc
2023-05-26 13:45 [PATCH 1/2] mempool/cnxk: avoid indefinite wait Ashwin Sekhar T K
2023-05-26 13:45 ` [PATCH 2/2] common/cnxk: add new APIs for batch operations Ashwin Sekhar T K
2023-05-29 9:14 ` [PATCH 1/2] mempool/cnxk: avoid indefinite wait Jerin Jacob
@ 2023-05-29 9:25 ` Ashwin Sekhar T K
2023-05-29 9:25 ` [PATCH v2 2/2] common/cnxk: add new APIs for batch operations Ashwin Sekhar T K
2023-05-30 9:12 ` [PATCH v3] " Ashwin Sekhar T K
3 siblings, 1 reply; 7+ messages in thread
From: Ashwin Sekhar T K @ 2023-05-29 9:25 UTC (permalink / raw)
To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, Ashwin Sekhar T K, Pavan Nikhilesh
Cc: jerinj, psatheesh, anoobj, gakhil, hkalra
Avoid waiting indefinitely when counting batch allocated
pointers by adding a wait timeout.
Fixes: 50d08d3934ec ("common/cnxk: fix batch alloc completion poll logic")
Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
drivers/common/cnxk/roc_npa.h | 15 +++++++++------
drivers/mempool/cnxk/cn10k_mempool_ops.c | 3 ++-
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index 21608a40d9..d3caa71586 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -241,19 +241,23 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
}
static inline void
-roc_npa_batch_alloc_wait(uint64_t *cache_line)
+roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
{
+ const uint64_t ticks = (uint64_t)wait_us * plt_tsc_hz() / (uint64_t)1E6;
+ const uint64_t start = plt_tsc_cycles();
+
/* Batch alloc status code is updated in bits [5:6] of the first word
* of the 128 byte cache line.
*/
while (((__atomic_load_n(cache_line, __ATOMIC_RELAXED) >> 5) & 0x3) ==
ALLOC_CCODE_INVAL)
- ;
+ if (wait_us && (plt_tsc_cycles() - start) >= ticks)
+ break;
}
static inline unsigned int
roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
- unsigned int do_wait)
+ unsigned int wait_us)
{
unsigned int count, i;
@@ -267,8 +271,7 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
- if (do_wait)
- roc_npa_batch_alloc_wait(&aligned_buf[i]);
+ roc_npa_batch_alloc_wait(&aligned_buf[i], wait_us);
count += status->count;
}
@@ -293,7 +296,7 @@ roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
- roc_npa_batch_alloc_wait(&aligned_buf[i]);
+ roc_npa_batch_alloc_wait(&aligned_buf[i], 0);
line_count = status->count;
diff --git a/drivers/mempool/cnxk/cn10k_mempool_ops.c b/drivers/mempool/cnxk/cn10k_mempool_ops.c
index ba826f0f01..ff0015d8de 100644
--- a/drivers/mempool/cnxk/cn10k_mempool_ops.c
+++ b/drivers/mempool/cnxk/cn10k_mempool_ops.c
@@ -9,6 +9,7 @@
#define BATCH_ALLOC_SZ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS
#define BATCH_OP_DATA_TABLE_MZ_NAME "batch_op_data_table_mz"
+#define BATCH_ALLOC_WAIT_US 5
enum batch_op_status {
BATCH_ALLOC_OP_NOT_ISSUED = 0,
@@ -178,7 +179,7 @@ cn10k_mempool_get_count(const struct rte_mempool *mp)
if (mem->status == BATCH_ALLOC_OP_ISSUED)
count += roc_npa_aura_batch_alloc_count(
- mem->objs, BATCH_ALLOC_SZ, 1);
+ mem->objs, BATCH_ALLOC_SZ, BATCH_ALLOC_WAIT_US);
if (mem->status == BATCH_ALLOC_OP_DONE)
count += mem->sz;
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/2] common/cnxk: add new APIs for batch operations
2023-05-29 9:25 ` [PATCH v2 1/2] mempool/cnxk: fix indefinite wait in batch alloc Ashwin Sekhar T K
@ 2023-05-29 9:25 ` Ashwin Sekhar T K
0 siblings, 0 replies; 7+ messages in thread
From: Ashwin Sekhar T K @ 2023-05-29 9:25 UTC (permalink / raw)
To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: jerinj, pbhagavatula, psatheesh, asekhar, anoobj, gakhil, hkalra
Add new APIs for counting and extracting allocated objects
from a single cache line in the batch alloc memory.
Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
1 file changed, 67 insertions(+), 11 deletions(-)
diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index d3caa71586..0653531198 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -209,7 +209,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
unsigned int num, const int dis_wait,
const int drop)
{
- unsigned int i;
int64_t *addr;
uint64_t res;
union {
@@ -220,10 +219,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
return -1;
- /* Zero first word of every cache line */
- for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
- buf[i] = 0;
-
addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
NPA_LF_AURA_BATCH_ALLOC);
cmp.u = 0;
@@ -240,6 +235,9 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
return 0;
}
+/*
+ * Wait for a batch alloc operation on a cache line to complete.
+ */
static inline void
roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
{
@@ -255,6 +253,23 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
break;
}
+/*
+ * Count the number of pointers in a single batch alloc cache line.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
+{
+ struct npa_batch_alloc_status_s *status;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, wait_us);
+
+ return status->count;
+}
+
+/*
+ * Count the number of pointers in a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
unsigned int wait_us)
@@ -279,6 +294,40 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
return count;
}
+/*
+ * Extract allocated pointers from a single batch alloc cache line. This api
+ * only extracts the required number of pointers from the cache line and it
+ * adjusts the statsus->count so that a subsequent call to this api can
+ * extract the remaining pointers in the cache line appropriately.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
+ unsigned int num, unsigned int *rem)
+{
+ struct npa_batch_alloc_status_s *status;
+ unsigned int avail;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, 0);
+ avail = status->count;
+ num = avail > num ? num : avail;
+ if (num)
+ memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
+ avail -= num;
+ if (avail == 0) {
+ /* Clear the lowest 7 bits of the first pointer */
+ buf[0] &= ~0x7FUL;
+ status->ccode = 0;
+ }
+ status->count = avail;
+ *rem = avail;
+
+ return num;
+}
+
+/*
+ * Extract all allocated pointers from a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
unsigned int num)
@@ -330,11 +379,15 @@ roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
}
}
+/*
+ * Issue a batch alloc operation on a sequence of cache lines, wait for the
+ * batch alloc to complete and copy the pointers out into the user buffer.
+ */
static inline unsigned int
roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
- uint64_t *aligned_buf, unsigned int num,
- const int dis_wait, const int drop,
- const int partial)
+ unsigned int num, uint64_t *aligned_buf,
+ unsigned int aligned_buf_sz, const int dis_wait,
+ const int drop, const int partial)
{
unsigned int count, chunk, num_alloc;
@@ -344,9 +397,12 @@ roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
count = 0;
while (num) {
- chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
- ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
- num;
+ /* Make sure that the pointers allocated fit into the cache
+ * lines reserved.
+ */
+ chunk = aligned_buf_sz / sizeof(uint64_t);
+ chunk = PLT_MIN(num, chunk);
+ chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
chunk, dis_wait, drop))
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v3] common/cnxk: add new APIs for batch operations
2023-05-26 13:45 [PATCH 1/2] mempool/cnxk: avoid indefinite wait Ashwin Sekhar T K
` (2 preceding siblings ...)
2023-05-29 9:25 ` [PATCH v2 1/2] mempool/cnxk: fix indefinite wait in batch alloc Ashwin Sekhar T K
@ 2023-05-30 9:12 ` Ashwin Sekhar T K
2023-05-30 16:51 ` Jerin Jacob
3 siblings, 1 reply; 7+ messages in thread
From: Ashwin Sekhar T K @ 2023-05-30 9:12 UTC (permalink / raw)
To: dev, Nithin Kumar Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao
Cc: jerinj, pbhagavatula, psatheesh, asekhar, anoobj, gakhil, hkalra
Add new APIs for counting and extracting allocated objects
from a single cache line in the batch alloc memory.
Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
1 file changed, 67 insertions(+), 11 deletions(-)
diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index e1e164499e..4ad5f044b5 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -209,7 +209,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
unsigned int num, const int dis_wait,
const int drop)
{
- unsigned int i;
int64_t *addr;
uint64_t res;
union {
@@ -220,10 +219,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
return -1;
- /* Zero first word of every cache line */
- for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
- buf[i] = 0;
-
addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
NPA_LF_AURA_BATCH_ALLOC);
cmp.u = 0;
@@ -240,6 +235,9 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
return 0;
}
+/*
+ * Wait for a batch alloc operation on a cache line to complete.
+ */
static inline void
roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
{
@@ -255,6 +253,23 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
break;
}
+/*
+ * Count the number of pointers in a single batch alloc cache line.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
+{
+ struct npa_batch_alloc_status_s *status;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, wait_us);
+
+ return status->count;
+}
+
+/*
+ * Count the number of pointers in a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
unsigned int wait_us)
@@ -279,6 +294,40 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
return count;
}
+/*
+ * Extract allocated pointers from a single batch alloc cache line. This api
+ * only extracts the required number of pointers from the cache line and it
+ * adjusts the statsus->count so that a subsequent call to this api can
+ * extract the remaining pointers in the cache line appropriately.
+ */
+static inline unsigned int
+roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
+ unsigned int num, unsigned int *rem)
+{
+ struct npa_batch_alloc_status_s *status;
+ unsigned int avail;
+
+ status = (struct npa_batch_alloc_status_s *)line;
+ roc_npa_batch_alloc_wait(line, 0);
+ avail = status->count;
+ num = avail > num ? num : avail;
+ if (num)
+ memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
+ avail -= num;
+ if (avail == 0) {
+ /* Clear the lowest 7 bits of the first pointer */
+ buf[0] &= ~0x7FUL;
+ status->ccode = 0;
+ }
+ status->count = avail;
+ *rem = avail;
+
+ return num;
+}
+
+/*
+ * Extract all allocated pointers from a sequence of batch alloc cache lines.
+ */
static inline unsigned int
roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
unsigned int num)
@@ -330,11 +379,15 @@ roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
}
}
+/*
+ * Issue a batch alloc operation on a sequence of cache lines, wait for the
+ * batch alloc to complete and copy the pointers out into the user buffer.
+ */
static inline unsigned int
roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
- uint64_t *aligned_buf, unsigned int num,
- const int dis_wait, const int drop,
- const int partial)
+ unsigned int num, uint64_t *aligned_buf,
+ unsigned int aligned_buf_sz, const int dis_wait,
+ const int drop, const int partial)
{
unsigned int count, chunk, num_alloc;
@@ -344,9 +397,12 @@ roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
count = 0;
while (num) {
- chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
- ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
- num;
+ /* Make sure that the pointers allocated fit into the cache
+ * lines reserved.
+ */
+ chunk = aligned_buf_sz / sizeof(uint64_t);
+ chunk = PLT_MIN(num, chunk);
+ chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
chunk, dis_wait, drop))
--
2.25.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3] common/cnxk: add new APIs for batch operations
2023-05-30 9:12 ` [PATCH v3] " Ashwin Sekhar T K
@ 2023-05-30 16:51 ` Jerin Jacob
0 siblings, 0 replies; 7+ messages in thread
From: Jerin Jacob @ 2023-05-30 16:51 UTC (permalink / raw)
To: Ashwin Sekhar T K
Cc: dev, Nithin Kumar Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
Satha Rao, jerinj, pbhagavatula, psatheesh, anoobj, gakhil,
hkalra
On Tue, May 30, 2023 at 2:43 PM Ashwin Sekhar T K <asekhar@marvell.com> wrote:
>
> Add new APIs for counting and extracting allocated objects
> from a single cache line in the batch alloc memory.
>
> Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
Applied to dpdk-next-net-mrvl/for-next-net. Thanks
> ---
> drivers/common/cnxk/roc_npa.h | 78 ++++++++++++++++++++++++++++++-----
> 1 file changed, 67 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index e1e164499e..4ad5f044b5 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -209,7 +209,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
> unsigned int num, const int dis_wait,
> const int drop)
> {
> - unsigned int i;
> int64_t *addr;
> uint64_t res;
> union {
> @@ -220,10 +219,6 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
> if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
> return -1;
>
> - /* Zero first word of every cache line */
> - for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
> - buf[i] = 0;
> -
> addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
> NPA_LF_AURA_BATCH_ALLOC);
> cmp.u = 0;
> @@ -240,6 +235,9 @@ roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
> return 0;
> }
>
> +/*
> + * Wait for a batch alloc operation on a cache line to complete.
> + */
> static inline void
> roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
> {
> @@ -255,6 +253,23 @@ roc_npa_batch_alloc_wait(uint64_t *cache_line, unsigned int wait_us)
> break;
> }
>
> +/*
> + * Count the number of pointers in a single batch alloc cache line.
> + */
> +static inline unsigned int
> +roc_npa_aura_batch_alloc_count_line(uint64_t *line, unsigned int wait_us)
> +{
> + struct npa_batch_alloc_status_s *status;
> +
> + status = (struct npa_batch_alloc_status_s *)line;
> + roc_npa_batch_alloc_wait(line, wait_us);
> +
> + return status->count;
> +}
> +
> +/*
> + * Count the number of pointers in a sequence of batch alloc cache lines.
> + */
> static inline unsigned int
> roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
> unsigned int wait_us)
> @@ -279,6 +294,40 @@ roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num,
> return count;
> }
>
> +/*
> + * Extract allocated pointers from a single batch alloc cache line. This api
> + * only extracts the required number of pointers from the cache line and it
> + * adjusts the statsus->count so that a subsequent call to this api can
> + * extract the remaining pointers in the cache line appropriately.
> + */
> +static inline unsigned int
> +roc_npa_aura_batch_alloc_extract_line(uint64_t *buf, uint64_t *line,
> + unsigned int num, unsigned int *rem)
> +{
> + struct npa_batch_alloc_status_s *status;
> + unsigned int avail;
> +
> + status = (struct npa_batch_alloc_status_s *)line;
> + roc_npa_batch_alloc_wait(line, 0);
> + avail = status->count;
> + num = avail > num ? num : avail;
> + if (num)
> + memcpy(buf, &line[avail - num], num * sizeof(uint64_t));
> + avail -= num;
> + if (avail == 0) {
> + /* Clear the lowest 7 bits of the first pointer */
> + buf[0] &= ~0x7FUL;
> + status->ccode = 0;
> + }
> + status->count = avail;
> + *rem = avail;
> +
> + return num;
> +}
> +
> +/*
> + * Extract all allocated pointers from a sequence of batch alloc cache lines.
> + */
> static inline unsigned int
> roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
> unsigned int num)
> @@ -330,11 +379,15 @@ roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
> }
> }
>
> +/*
> + * Issue a batch alloc operation on a sequence of cache lines, wait for the
> + * batch alloc to complete and copy the pointers out into the user buffer.
> + */
> static inline unsigned int
> roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
> - uint64_t *aligned_buf, unsigned int num,
> - const int dis_wait, const int drop,
> - const int partial)
> + unsigned int num, uint64_t *aligned_buf,
> + unsigned int aligned_buf_sz, const int dis_wait,
> + const int drop, const int partial)
> {
> unsigned int count, chunk, num_alloc;
>
> @@ -344,9 +397,12 @@ roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
>
> count = 0;
> while (num) {
> - chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
> - ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
> - num;
> + /* Make sure that the pointers allocated fit into the cache
> + * lines reserved.
> + */
> + chunk = aligned_buf_sz / sizeof(uint64_t);
> + chunk = PLT_MIN(num, chunk);
> + chunk = PLT_MIN((int)chunk, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
>
> if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
> chunk, dis_wait, drop))
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 7+ messages in thread