From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id 31BB641B9D;
	Wed,  1 Feb 2023 10:25:53 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id A445A42FC4;
	Wed,  1 Feb 2023 10:23:45 +0100 (CET)
Received: from mx0b-0016f401.pphosted.com (mx0a-0016f401.pphosted.com
 [67.231.148.174])
 by mails.dpdk.org (Postfix) with ESMTP id 5FAD142D29
 for <dev@dpdk.org>; Wed,  1 Feb 2023 10:23:24 +0100 (CET)
Received: from pps.filterd (m0045849.ppops.net [127.0.0.1])
 by mx0a-0016f401.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id
 3116LRY3024189 for <dev@dpdk.org>; Wed, 1 Feb 2023 01:23:23 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com;
 h=from : to : cc :
 subject : date : message-id : in-reply-to : references : mime-version :
 content-type; s=pfpt0220; bh=WI+s8waNHLHZmrv/uq349SRNHONgcYg1LjVDzGrkK98=;
 b=J8xYPQja4x4dYfKzXRGVTvvaxFDk+o/vLIsc1+m0eHnc6WGZOT80BmnQJ8B0B4q1PsSS
 kfwXggZMAZ9UnIwVD2zOxIhW4sKR06aRN6n0A891X9DIUWiUqHpBTE/MMywSh90UmRTo
 OgOIr5CnmfigzX2aQQM9tFKaOVkMbNxHm17HcFIVXc2QAc4ELjzauwXqBDoixjlFAxRu
 Rf9cHDte1HzgG8MVZo01qsFk3+TIBopGhbdxdIi5kGZgV3fHebjQ1dnqxkIO+La1aoi/
 vJu9WfPhI0zxcgB2kiNJWuuzrgCDGyNKxqhFvo6FTisqeUb5fgAr7PoU0uw2rLrofFUX vA== 
Received: from dc5-exch02.marvell.com ([199.233.59.182])
 by mx0a-0016f401.pphosted.com (PPS) with ESMTPS id 3nfjr8rgv6-4
 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)
 for <dev@dpdk.org>; Wed, 01 Feb 2023 01:23:23 -0800
Received: from DC5-EXCH02.marvell.com (10.69.176.39) by DC5-EXCH02.marvell.com
 (10.69.176.39) with Microsoft SMTP Server (TLS) id 15.0.1497.42;
 Wed, 1 Feb 2023 01:23:19 -0800
Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH02.marvell.com
 (10.69.176.39) with Microsoft SMTP Server id 15.0.1497.42 via Frontend
 Transport; Wed, 1 Feb 2023 01:23:19 -0800
Received: from ml-host-33.caveonetworks.com (unknown [10.110.143.233])
 by maili.marvell.com (Postfix) with ESMTP id 448903F70E8;
 Wed,  1 Feb 2023 01:23:17 -0800 (PST)
From: Srikanth Yalavarthi <syalavarthi@marvell.com>
To: Srikanth Yalavarthi <syalavarthi@marvell.com>
CC: <dev@dpdk.org>, <sshankarnara@marvell.com>, <jerinj@marvell.com>,
 <aprabhu@marvell.com>
Subject: [PATCH v4 14/39] ml/cnxk: add internal structures for tiles and OCM
Date: Wed, 1 Feb 2023 01:22:45 -0800
Message-ID: <20230201092310.23252-15-syalavarthi@marvell.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20230201092310.23252-1-syalavarthi@marvell.com>
References: <20221208200220.20267-1-syalavarthi@marvell.com>
 <20230201092310.23252-1-syalavarthi@marvell.com>
MIME-Version: 1.0
Content-Type: text/plain
X-Proofpoint-GUID: 7j0h5b2TIk2AlWmHVX57So4_VMcXooSX
X-Proofpoint-ORIG-GUID: 7j0h5b2TIk2AlWmHVX57So4_VMcXooSX
X-Proofpoint-Virus-Version: vendor=baseguard
 engine=ICAP:2.0.219,Aquarius:18.0.930,Hydra:6.0.562,FMLib:17.11.122.1
 definitions=2023-02-01_03,2023-01-31_01,2022-06-22_01
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Added internal structures to handle tile and OCM information and
OCM to model memory mapping. Initialize the fields to platform
specific defaults and compute the OCM / tile requirements for model.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   |  5 ++
 drivers/ml/cnxk/cn10k_ml_model.c | 53 +++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_model.h |  6 +++
 drivers/ml/cnxk/cn10k_ml_ocm.c   |  5 ++
 drivers/ml/cnxk/cn10k_ml_ocm.h   | 79 ++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 29 ++++++++++++
 drivers/ml/cnxk/meson.build      |  2 +
 7 files changed, 179 insertions(+)
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.c
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.h

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 7cf6268115..02a4496c97 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -7,6 +7,8 @@
 
 #include <roc_api.h>
 
+#include "cn10k_ml_ocm.h"
+
 /* Marvell OCTEON CN10K ML PMD device name */
 #define MLDEV_NAME_CN10K_PMD ml_cn10k
 
@@ -215,6 +217,9 @@ struct cn10k_ml_dev {
 	/* Firmware */
 	struct cn10k_ml_fw fw;
 
+	/* OCM info */
+	struct cn10k_ml_ocm ocm;
+
 	/* Number of models loaded */
 	uint16_t nb_models_loaded;
 };
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index dafcae106b..30911b7ffe 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -8,6 +8,7 @@
 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_model.h"
+#include "cn10k_ml_ocm.h"
 
 static enum rte_ml_io_type
 cn10k_ml_io_type_map(uint8_t type)
@@ -303,3 +304,55 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
 			   addr->output[i].sz_d, addr->output[i].sz_q);
 	}
 }
+
+int
+cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+			       uint16_t *wb_pages, uint16_t *scratch_pages)
+{
+	struct cn10k_ml_model_metadata *metadata;
+	struct cn10k_ml_ocm *ocm;
+	uint64_t scratch_size;
+	uint64_t wb_size;
+
+	metadata = (struct cn10k_ml_model_metadata *)buffer;
+	ocm = &mldev->ocm;
+
+	/* Assume wb_size is zero for non-relocatable models */
+	if (metadata->model.ocm_relocatable)
+		wb_size = metadata->model.ocm_wb_range_end - metadata->model.ocm_wb_range_start + 1;
+	else
+		wb_size = 0;
+
+	if (wb_size % ocm->page_size)
+		*wb_pages = wb_size / ocm->page_size + 1;
+	else
+		*wb_pages = wb_size / ocm->page_size;
+	plt_ml_dbg("model_id = %d, wb_size = %" PRIu64 ", wb_pages = %u", model_id, wb_size,
+		   *wb_pages);
+
+	scratch_size = ocm->size_per_tile - metadata->model.ocm_tmp_range_floor;
+	if (metadata->model.ocm_tmp_range_floor % ocm->page_size)
+		*scratch_pages = scratch_size / ocm->page_size + 1;
+	else
+		*scratch_pages = scratch_size / ocm->page_size;
+	plt_ml_dbg("model_id = %d, scratch_size = %" PRIu64 ", scratch_pages = %u", model_id,
+		   scratch_size, *scratch_pages);
+
+	/* Check if the model can be loaded on OCM */
+	if ((*wb_pages + *scratch_pages) > ML_CN10K_OCM_NUMPAGES) {
+		plt_err("Cannot create the model, OCM relocatable = %u",
+			metadata->model.ocm_relocatable);
+		plt_err("wb_pages (%u) + scratch_pages (%u) > %u", *wb_pages, *scratch_pages,
+			ML_CN10K_OCM_NUMPAGES);
+		return -ENOMEM;
+	}
+
+	/* Update scratch_pages to block the full tile for OCM non-relocatable model. This would
+	 * prevent the library from allocating the remaining space on the tile to other models.
+	 */
+	if (!metadata->model.ocm_relocatable)
+		*scratch_pages =
+			PLT_MAX(PLT_U64_CAST(*scratch_pages), PLT_U64_CAST(ML_CN10K_OCM_NUMPAGES));
+
+	return 0;
+}
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 7e276c3b12..ebd296c609 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -10,6 +10,7 @@
 #include <roc_api.h>
 
 #include "cn10k_ml_dev.h"
+#include "cn10k_ml_ocm.h"
 
 /* Model state */
 enum cn10k_ml_model_state {
@@ -417,6 +418,9 @@ struct cn10k_ml_model {
 	/* Address structure */
 	struct cn10k_ml_model_addr addr;
 
+	/* Tile and memory information object */
+	struct cn10k_ml_ocm_model_map model_mem_map;
+
 	/* Spinlock, used to update model state */
 	plt_spinlock_t lock;
 
@@ -428,5 +432,7 @@ int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
 void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
 void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
 				uint8_t *base_dma_addr);
+int cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, int16_t model_id, uint8_t *buffer,
+				   uint16_t *wb_pages, uint16_t *scratch_pages);
 
 #endif /* _CN10K_ML_MODEL_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
new file mode 100644
index 0000000000..b1c62f2963
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#include "cn10k_ml_ocm.h"
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.h b/drivers/ml/cnxk/cn10k_ml_ocm.h
new file mode 100644
index 0000000000..44390396f9
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#ifndef _CN10K_ML_OCM_H_
+#define _CN10K_ML_OCM_H_
+
+#include <rte_mldev.h>
+
+/* Page size in bytes. */
+#define ML_CN10K_OCM_PAGESIZE 0x4000
+
+/* Number of OCM tiles. */
+#define ML_CN10K_OCM_NUMTILES 0x8
+
+/* OCM in bytes, per tile. */
+#define ML_CN10K_OCM_TILESIZE 0x100000
+
+/* OCM pages, per tile. */
+#define ML_CN10K_OCM_NUMPAGES (ML_CN10K_OCM_TILESIZE / ML_CN10K_OCM_PAGESIZE)
+
+/* Maximum OCM mask words, per tile, 8 bit words. */
+#define ML_CN10K_OCM_MASKWORDS (ML_CN10K_OCM_NUMPAGES / 8)
+
+/* OCM and Tile information structure */
+struct cn10k_ml_ocm_tile_info {
+	/* Mask of used / allotted pages on tile's OCM */
+	uint8_t ocm_mask[ML_CN10K_OCM_MASKWORDS];
+
+	/* Last pages in the tile's OCM used for weights and bias, default = -1 */
+	int last_wb_page;
+
+	/* Number pages used for scratch memory on the tile's OCM */
+	uint16_t scratch_pages;
+};
+
+/* Model OCM map structure */
+struct cn10k_ml_ocm_model_map {
+	/* Status of OCM reservation */
+	bool ocm_reserved;
+
+	/* Mask of OCM tiles for the model */
+	uint64_t tilemask;
+
+	/* Start page for the model load, default = -1 */
+	int wb_page_start;
+
+	/* Number of pages required for weights and bias */
+	uint16_t wb_pages;
+
+	/* Number of pages required for scratch memory */
+	uint16_t scratch_pages;
+};
+
+/* OCM state structure */
+struct cn10k_ml_ocm {
+	/* OCM spinlock, used to update OCM state */
+	rte_spinlock_t lock;
+
+	/* Number of OCM tiles */
+	uint8_t num_tiles;
+
+	/* OCM size per each tile */
+	uint64_t size_per_tile;
+
+	/* Size of OCM page */
+	uint64_t page_size;
+
+	/* Number of OCM pages */
+	uint16_t num_pages;
+
+	/* Words per OCM mask */
+	uint16_t mask_words;
+
+	/* OCM memory info and status*/
+	struct cn10k_ml_ocm_tile_info tile_ocm_info[ML_CN10K_OCM_NUMTILES];
+};
+
+#endif /* _CN10K_ML_OCM_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 20f15ec35d..9ccf52332f 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -126,8 +126,10 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 	struct rte_ml_dev_info dev_info;
 	struct cn10k_ml_model *model;
 	struct cn10k_ml_dev *mldev;
+	struct cn10k_ml_ocm *ocm;
 	struct cn10k_ml_qp *qp;
 	uint32_t mz_size;
+	uint16_t tile_id;
 	int16_t model_id;
 	uint16_t qp_id;
 	int ret;
@@ -250,6 +252,18 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c
 	}
 	dev->data->nb_models = conf->nb_models;
 
+	ocm = &mldev->ocm;
+	ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
+	ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
+	ocm->page_size = ML_CN10K_OCM_PAGESIZE;
+	ocm->num_pages = ocm->size_per_tile / ocm->page_size;
+	ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
+
+	for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++)
+		ocm->tile_ocm_info[tile_id].last_wb_page = -1;
+
+	rte_spinlock_init(&ocm->lock);
+
 	mldev->nb_models_loaded = 0;
 	mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
@@ -416,6 +430,8 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	const struct plt_memzone *mz;
 	size_t model_data_size;
 	uint8_t *base_dma_addr;
+	uint16_t scratch_pages;
+	uint16_t wb_pages;
 	uint64_t mz_size;
 	uint16_t idx;
 	bool found;
@@ -441,6 +457,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 		return -ENOMEM;
 	}
 
+	/* Get WB and scratch pages, check if model can be loaded. */
+	ret = cn10k_ml_model_ocm_pages_count(mldev, idx, params->addr, &wb_pages, &scratch_pages);
+	if (ret < 0)
+		return ret;
+
 	/* Compute memzone size */
 	metadata = (struct cn10k_ml_model_metadata *)params->addr;
 	model_data_size = metadata->init_model.file_size + metadata->main_model.file_size +
@@ -478,6 +499,14 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params,
 	/* Copy data from load to run. run address to be used by MLIP */
 	rte_memcpy(model->addr.base_dma_addr_run, model->addr.base_dma_addr_load, model_data_size);
 
+	/* Initialize model_mem_map */
+	memset(&model->model_mem_map, 0, sizeof(struct cn10k_ml_ocm_model_map));
+	model->model_mem_map.ocm_reserved = false;
+	model->model_mem_map.tilemask = 0;
+	model->model_mem_map.wb_page_start = -1;
+	model->model_mem_map.wb_pages = wb_pages;
+	model->model_mem_map.scratch_pages = scratch_pages;
+
 	plt_spinlock_init(&model->lock);
 	model->state = ML_CN10K_MODEL_STATE_LOADED;
 	dev->data->models[idx] = model;
diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build
index 799e8f2470..393bc629b0 100644
--- a/drivers/ml/cnxk/meson.build
+++ b/drivers/ml/cnxk/meson.build
@@ -11,12 +11,14 @@ driver_sdk_headers = files(
         'cn10k_ml_dev.h',
         'cn10k_ml_ops.h',
         'cn10k_ml_model.h',
+        'cn10k_ml_ocm.h',
 )
 
 sources = files(
         'cn10k_ml_dev.c',
         'cn10k_ml_ops.c',
         'cn10k_ml_model.c',
+        'cn10k_ml_ocm.c',
 )
 
 deps += ['mldev', 'common_cnxk', 'kvargs', 'hash']
-- 
2.17.1