From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CB16942C4D; Wed, 7 Jun 2023 13:43:12 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 5B89B40A84; Wed, 7 Jun 2023 13:43:12 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 99EBA40698 for ; Wed, 7 Jun 2023 13:43:11 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 357BJ4pO031514 for ; Wed, 7 Jun 2023 04:43:11 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : mime-version : content-type; s=pfpt0220; bh=j4nTHeWsI9YsjE7ZR3eSYP8wMTnSuGtiv26LZLCK7Ck=; b=a8x2AEKl8pEU7D6kImBf9VTRHJPJiTTWmkruD9u3lMu+eeVF8sTohFEF/AdHH8CiNDtW SR9ByrfKmKv4vMitylikcr1aE3tc4t42uLFZYUiaO3OmCLUSwzwFeljYKPSBDhSCmlVa 34b+8oLus2DkFlLHnVLBcatymx9OLggcIw3/yvZAxicPYTa8Y5i9oeVKEjAZkuV4Kd31 llejmjtPT/kK1ABpHp8Isymx8XHltl5FVv0bud8KbfqpGmBBAxHNNhvQTzq/G8OjpqLx Q2n2amrLXp8qVSXfwKacokMZdM4+vgb6B4G93pLyLHY9iB+oefxwR5tBATeEPnNccr7l aQ== Received: from dc5-exch01.marvell.com ([199.233.59.181]) by mx0b-0016f401.pphosted.com (PPS) with ESMTPS id 3r2a75afw2-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Wed, 07 Jun 2023 04:43:10 -0700 Received: from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.48; Wed, 7 Jun 2023 04:43:08 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.48 via Frontend Transport; Wed, 7 Jun 2023 04:43:08 -0700 Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233]) by maili.marvell.com (Postfix) with ESMTP id 7DFBD3F7045; Wed, 7 Jun 2023 04:43:08 -0700 (PDT) From: Srikanth Yalavarthi To: Srikanth Yalavarthi CC: , , , Subject: [PATCH v1] ml/cnxk: enable support for scratch relocation Date: Wed, 7 Jun 2023 04:43:06 -0700 Message-ID: <20230607114306.4156-1-syalavarthi@marvell.com> X-Mailer: git-send-email 2.17.1 MIME-Version: 1.0 Content-Type: text/plain X-Proofpoint-ORIG-GUID: T4Q3KolH9kwYS63gFgbBtbA_GiRvaX1p X-Proofpoint-GUID: T4Q3KolH9kwYS63gFgbBtbA_GiRvaX1p X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.254,Aquarius:18.0.957,Hydra:6.0.573,FMLib:17.11.176.26 definitions=2023-06-07_06,2023-06-07_01,2023-05-22_02 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Enabled support for relocation of scratch memory. Added support for extended arguments in load job descriptor to handle scratch range start, end and base address. Signed-off-by: Srikanth Yalavarthi --- Depends-on: patch-126427 ("[v1,3/3] ml/cnxk: add support for 32 I/O per model") drivers/ml/cnxk/cn10k_ml_dev.h | 21 +++++++++++++++++++-- drivers/ml/cnxk/cn10k_ml_model.h | 3 +++ drivers/ml/cnxk/cn10k_ml_ops.c | 22 ++++++++++++++++++---- drivers/ml/cnxk/cn10k_ml_ops.h | 3 +++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 5a8c8206b2..6ca0b0bb6e 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -38,6 +38,7 @@ /* ML slow-path job flags */ #define ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE BIT(0) +#define ML_CN10K_SP_FLAGS_EXTENDED_LOAD_JD BIT(1) /* Poll mode job state */ #define ML_CN10K_POLL_JOB_START 0 @@ -233,6 +234,22 @@ struct cn10k_ml_jd_header { uint64_t *result; }; +/* Extra arguments for job descriptor */ +union cn10k_ml_jd_extended_args { + struct cn10k_ml_jd_extended_args_section_start { + /** DDR Scratch base address */ + uint64_t ddr_scratch_base_address; + + /** DDR Scratch range start */ + uint64_t ddr_scratch_range_start; + + /** DDR Scratch range end */ + uint64_t ddr_scratch_range_end; + + uint8_t rsvd[104]; + } start; +}; + /* Job descriptor structure */ struct cn10k_ml_jd { /* Job descriptor header (32 bytes) */ @@ -256,8 +273,8 @@ struct cn10k_ml_jd { } fw_load; struct cn10k_ml_jd_section_model_start { - /* Source model start address in DDR relative to ML_MLR_BASE */ - uint64_t model_src_ddr_addr; + /* Extended arguments */ + uint64_t extended_args; /* Destination model start address in DDR relative to ML_MLR_BASE */ uint64_t model_dst_ddr_addr; diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h index fd3e235221..1f689363fc 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.h +++ b/drivers/ml/cnxk/cn10k_ml_model.h @@ -398,6 +398,9 @@ struct cn10k_ml_model_addr { /* Weights and bias load address */ void *wb_load_addr; + /* Scratch base address */ + void *scratch_base_addr; + /* Start tile */ uint8_t tile_start; diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index c9d78ef571..656467d891 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -416,8 +416,10 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *mldev, struct cn10k_ml_mode req->jd.hdr.sp_flags = ML_CN10K_SP_FLAGS_OCM_NONRELOCATABLE; else req->jd.hdr.sp_flags = 0x0; - req->jd.model_start.model_src_ddr_addr = - PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, addr->init_load_addr)); + + req->jd.hdr.sp_flags |= ML_CN10K_SP_FLAGS_EXTENDED_LOAD_JD; + req->jd.model_start.extended_args = + PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, &req->extended_args)); req->jd.model_start.model_dst_ddr_addr = PLT_U64_CAST(roc_ml_addr_ap2mlip(&mldev->roc, addr->init_run_addr)); req->jd.model_start.model_init_offset = 0x0; @@ -448,6 +450,13 @@ cn10k_ml_prep_sp_job_descriptor(struct cn10k_ml_dev *mldev, struct cn10k_ml_mode req->jd.model_start.output.s.ddr_range_start = metadata->model.ddr_output_range_start; req->jd.model_start.output.s.ddr_range_end = metadata->model.ddr_output_range_end; + + req->extended_args.start.ddr_scratch_base_address = PLT_U64_CAST( + roc_ml_addr_ap2mlip(&mldev->roc, model->addr.scratch_base_addr)); + req->extended_args.start.ddr_scratch_range_start = + metadata->model.ddr_scratch_range_start; + req->extended_args.start.ddr_scratch_range_end = + metadata->model.ddr_scratch_range_end; } } @@ -1616,6 +1625,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, char str[RTE_MEMZONE_NAMESIZE]; const struct plt_memzone *mz; + size_t model_scratch_size; size_t model_stats_size; size_t model_data_size; size_t model_info_size; @@ -1657,6 +1667,9 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, metadata = (struct cn10k_ml_model_metadata *)params->addr; model_data_size = metadata->init_model.file_size + metadata->main_model.file_size + metadata->finish_model.file_size + metadata->weights_bias.file_size; + model_scratch_size = PLT_ALIGN_CEIL(metadata->model.ddr_scratch_range_end - + metadata->model.ddr_scratch_range_start + 1, + ML_CN10K_ALIGN_SIZE); model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE); model_info_size = sizeof(struct rte_ml_model_info) + metadata->model.num_input * sizeof(struct rte_ml_io_info) + @@ -1665,7 +1678,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, model_stats_size = (dev->data->nb_queue_pairs + 1) * sizeof(struct cn10k_ml_model_stats); mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) + - 2 * model_data_size + model_info_size + + 2 * model_data_size + model_scratch_size + model_info_size + PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE) + model_stats_size; @@ -1694,6 +1707,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, base_dma_addr = PLT_PTR_ADD( mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE)); cn10k_ml_model_addr_update(model, params->addr, base_dma_addr); + model->addr.scratch_base_addr = PLT_PTR_ADD(base_dma_addr, 2 * model_data_size); /* Copy data from load to run. run address to be used by MLIP */ rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size); @@ -1707,7 +1721,7 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, model->model_mem_map.scratch_pages = scratch_pages; /* Set model info */ - model->info = PLT_PTR_ADD(base_dma_addr, 2 * model_data_size); + model->info = PLT_PTR_ADD(model->addr.scratch_base_addr, model_scratch_size); cn10k_ml_model_info_set(dev, model); /* Set slow-path request address and state */ diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index 58c992720a..d64a9f27e6 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -17,6 +17,9 @@ struct cn10k_ml_req { /* Job descriptor */ struct cn10k_ml_jd jd; + /* Job descriptor extra arguments */ + union cn10k_ml_jd_extended_args extended_args; + /* Job result */ struct cn10k_ml_result result; -- 2.17.1