DPDK patches and discussions
 help / color / mirror / Atom feed
From: Srikanth Yalavarthi <syalavarthi@marvell.com>
To: Srikanth Yalavarthi <syalavarthi@marvell.com>
Cc: <dev@dpdk.org>, <sshankarnara@marvell.com>, <aprabhu@marvell.com>,
	<ptakkar@marvell.com>
Subject: [PATCH v1] ml/cnxk: enable support for scratch relocation
Date: Wed, 7 Jun 2023 04:43:06 -0700	[thread overview]
Message-ID: <20230607114306.4156-1-syalavarthi@marvell.com> (raw)

Enabled support for relocation of scratch memory. Added
support for extended arguments in load job descriptor to
handle scratch range start, end and base address.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
Depends-on: patch-126427 ("[v1,3/3] ml/cnxk: add support for 32 I/O per model")

 drivers/ml/cnxk/cn10k_ml_dev.h   | 21 +++++++++++++++++++--
 drivers/ml/cnxk/cn10k_ml_model.h |  3 +++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 22 ++++++++++++++++++----
 drivers/ml/cnxk/cn10k_ml_ops.h   |  3 +++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 5a8c8206b2..6ca0b0bb6e 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -38,6 +38,7 @@
 
 /* ML slow-path job flags */
 #define ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE BIT(0)
+#define ML_CN10K_SP_FLAGS_EXTENDED_LOAD_JD   BIT(1)
 
 /* Poll mode job state */
 #define ML_CN10K_POLL_JOB_START	 0
@@ -233,6 +234,22 @@ struct cn10k_ml_jd_header {
 	uint64_t *result;
 };
 
+/* Extra arguments for job descriptor */
+union cn10k_ml_jd_extended_args {
+	struct cn10k_ml_jd_extended_args_section_start {
+		/** DDR Scratch base address */
+		uint64_t ddr_scratch_base_address;
+
+		/** DDR Scratch range start */
+		uint64_t ddr_scratch_range_start;
+
+		/** DDR Scratch range end */
+		uint64_t ddr_scratch_range_end;
+
+		uint8_t rsvd[104];
+	} start;
+};
+
 /* Job descriptor structure */
 struct cn10k_ml_jd {
 	/* Job descriptor header (32 bytes) */
@@ -256,8 +273,8 @@ struct cn10k_ml_jd {
 		} fw_load;
 
 		struct cn10k_ml_jd_section_model_start {
-			/* Source model start address in DDR relative to ML_MLR_BASE */
-			uint64_t model_src_ddr_addr;
+			/* Extended arguments */
+			uint64_t extended_args;
 
 			/* Destination model start address in DDR relative to ML_MLR_BASE */
 			uint64_t model_dst_ddr_addr;
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index fd3e235221..1f689363fc 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -398,6 +398,9 @@ struct cn10k_ml_model_addr {
 	/* Weights and bias load address */
 	void *wb_load_addr;
 
+	/* Scratch base address */
+	void *scratch_base_addr;
+
 	/* Start tile */
 	uint8_t tile_start;
 
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index c9d78ef571..656467d891 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -416,8 +416,10 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *mldev, struct cn10k_ml_mode
 			req->jd.hdr.sp_flags = ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE;
 		else
 			req->jd.hdr.sp_flags = 0x0;
-		req->jd.model_start.model_src_ddr_addr =
-			PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, addr->init_load_addr));
+
+		req->jd.hdr.sp_flags |= ML_CN10K_SP_FLAGS_EXTENDED_LOAD_JD;
+		req->jd.model_start.extended_args =
+			PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, &req->extended_args));
 		req->jd.model_start.model_dst_ddr_addr =
 			PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, addr->init_run_addr));
 		req->jd.model_start.model_init_offset = 0x0;
@@ -448,6 +450,13 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *mldev, struct cn10k_ml_mode
 		req->jd.model_start.output.s.ddr_range_start =
 			metadata->model.ddr_output_range_start;
 		req->jd.model_start.output.s.ddr_range_end = metadata->model.ddr_output_range_end;
+
+		req->extended_args.start.ddr_scratch_base_address = PLT_U64_CAST(
+			roc_ml_addr_ap2mlip(&mldev->roc, model->addr.scratch_base_addr));
+		req->extended_args.start.ddr_scratch_range_start =
+			metadata->model.ddr_scratch_range_start;
+		req->extended_args.start.ddr_scratch_range_end =
+			metadata->model.ddr_scratch_range_end;
 	}
 }
 
@@ -1616,6 +1625,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 
 	char str[RTE_MEMZONE_NAMESIZE];
 	const struct plt_memzone *mz;
+	size_t model_scratch_size;
 	size_t model_stats_size;
 	size_t model_data_size;
 	size_t model_info_size;
@@ -1657,6 +1667,9 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	metadata = (struct cn10k_ml_model_metadata *)params->addr;
 	model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
 			  metadata->finish_model.file_size + metadata->weights_bias.file_size;
+	model_scratch_size = PLT_ALIGN_CEIL(metadata->model.ddr_scratch_range_end -
+						    metadata->model.ddr_scratch_range_start + 1,
+					    ML_CN10K_ALIGN_SIZE);
 	model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE);
 	model_info_size = sizeof(struct rte_ml_model_info) +
 			  metadata->model.num_input * sizeof(struct rte_ml_io_info) +
@@ -1665,7 +1678,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_model_stats);
 
 	mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) +
-		  2 * model_data_size + model_info_size +
+		  2 * model_data_size + model_scratch_size + model_info_size +
 		  PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE) +
 		  model_stats_size;
 
@@ -1694,6 +1707,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	base_dma_addr = PLT_PTR_ADD(
 		mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE));
 	cn10k_ml_model_addr_update(model, params->addr, base_dma_addr);
+	model->addr.scratch_base_addr = PLT_PTR_ADD(base_dma_addr, 2 * model_data_size);
 
 	/* Copy data from load to run. run address to be used by MLIP */
 	rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
@@ -1707,7 +1721,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	model->model_mem_map.scratch_pages = scratch_pages;
 
 	/* Set model info */
-	model->info = PLT_PTR_ADD(base_dma_addr, 2 * model_data_size);
+	model->info = PLT_PTR_ADD(model->addr.scratch_base_addr, model_scratch_size);
 	cn10k_ml_model_info_set(dev, model);
 
 	/* Set slow-path request address and state */
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index 58c992720a..d64a9f27e6 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -17,6 +17,9 @@ struct cn10k_ml_req {
 	/* Job descriptor */
 	struct cn10k_ml_jd jd;
 
+	/* Job descriptor extra arguments */
+	union cn10k_ml_jd_extended_args extended_args;
+
 	/* Job result */
 	struct cn10k_ml_result result;
 
-- 
2.17.1


             reply	other threads:[~2023-06-07 11:43 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-07 11:43 Srikanth Yalavarthi [this message]
2023-06-12 16:32 ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230607114306.4156-1-syalavarthi@marvell.com \
    --to=syalavarthi@marvell.com \
    --cc=aprabhu@marvell.com \
    --cc=dev@dpdk.org \
    --cc=ptakkar@marvell.com \
    --cc=sshankarnara@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).