[PATCH v1 0/3] Add support for 32 I/O per model

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH v1 0/3] Add support for 32 I/O per model
@ 2023-04-23  5:08 Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23  5:08 UTC (permalink / raw)
  Cc: dev, syalavarthi, sshankarnara, aprabhu, ptakkar

This patch series adds support for 32 inputs / outputs per each
model. Changes required to enable the required support include:

1. Splitiing model metadata fields into structures.
2. Update model metadata to v2301 which supports 32 I/O.
3. Update ML driver code to support metadata v2301 .


Srikanth Yalavarthi (3):
  ml/cnxk: split metadata fields into sections
  ml/cnxk: update model metadata to v2301
  ml/cnxk: add support for 32 I/O per model

 drivers/ml/cnxk/cn10k_ml_model.c | 401 +++++++++++++++++-------
 drivers/ml/cnxk/cn10k_ml_model.h | 512 +++++++++++++++++--------------
 drivers/ml/cnxk/cn10k_ml_ops.c   | 133 ++++++--
 3 files changed, 659 insertions(+), 387 deletions(-)

--
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 1/3] ml/cnxk: split metadata fields into sections
  2023-04-23  5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
@ 2023-04-23  5:08 ` Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23  5:08 UTC (permalink / raw)
  To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar

Split metadata into header, model sections, weights & bias,
input / output and data sections. This is a preparatory step
to introduce v2301 of model metadata.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_model.c |  26 +-
 drivers/ml/cnxk/cn10k_ml_model.h | 487 ++++++++++++++++---------------
 2 files changed, 270 insertions(+), 243 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index 2ded05c5dc..c0b7b061f5 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -47,42 +47,42 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	metadata = (struct cn10k_ml_model_metadata *)buffer;
 
 	/* Header CRC check */
-	if (metadata->metadata_header.header_crc32c != 0) {
-		header_crc32c = rte_hash_crc(
-			buffer, sizeof(metadata->metadata_header) - sizeof(uint32_t), 0);
+	if (metadata->header.header_crc32c != 0) {
+		header_crc32c =
+			rte_hash_crc(buffer, sizeof(metadata->header) - sizeof(uint32_t), 0);
 
-		if (header_crc32c != metadata->metadata_header.header_crc32c) {
+		if (header_crc32c != metadata->header.header_crc32c) {
 			plt_err("Invalid model, Header CRC mismatch");
 			return -EINVAL;
 		}
 	}
 
 	/* Payload CRC check */
-	if (metadata->metadata_header.payload_crc32c != 0) {
-		payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->metadata_header),
-					      size - sizeof(metadata->metadata_header), 0);
+	if (metadata->header.payload_crc32c != 0) {
+		payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->header),
+					      size - sizeof(metadata->header), 0);
 
-		if (payload_crc32c != metadata->metadata_header.payload_crc32c) {
+		if (payload_crc32c != metadata->header.payload_crc32c) {
 			plt_err("Invalid model, Payload CRC mismatch");
 			return -EINVAL;
 		}
 	}
 
 	/* Model magic string */
-	if (strncmp((char *)metadata->metadata_header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) {
-		plt_err("Invalid model, magic = %s", metadata->metadata_header.magic);
+	if (strncmp((char *)metadata->header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) {
+		plt_err("Invalid model, magic = %s", metadata->header.magic);
 		return -EINVAL;
 	}
 
 	/* Target architecture */
-	if (metadata->metadata_header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) {
+	if (metadata->header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) {
 		plt_err("Model target architecture (%u) not supported",
-			metadata->metadata_header.target_architecture);
+			metadata->header.target_architecture);
 		return -ENOTSUP;
 	}
 
 	/* Header version */
-	rte_memcpy(version, metadata->metadata_header.version, 4 * sizeof(uint8_t));
+	rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
 	if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) {
 		plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
 			version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10,
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 1bc748265d..b30ad5a981 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -30,298 +30,325 @@ enum cn10k_ml_model_state {
 #define MRVL_ML_OUTPUT_NAME_LEN	   16
 #define MRVL_ML_INPUT_OUTPUT_SIZE  8
 
-/* Model file metadata structure */
-struct cn10k_ml_model_metadata {
-	/* Header (256-byte) */
-	struct {
-		/* Magic string ('M', 'R', 'V', 'L') */
-		uint8_t magic[4];
+/* Header (256-byte) */
+struct cn10k_ml_model_metadata_header {
+	/* Magic string ('M', 'R', 'V', 'L') */
+	uint8_t magic[4];
 
-		/* Metadata version */
-		uint8_t version[4];
+	/* Metadata version */
+	uint8_t version[4];
 
-		/* Metadata size */
-		uint32_t metadata_size;
+	/* Metadata size */
+	uint32_t metadata_size;
 
-		/* Unique ID */
-		uint8_t uuid[128];
+	/* Unique ID */
+	uint8_t uuid[128];
 
-		/* Model target architecture
-		 * 0 = Undefined
-		 * 1 = M1K
-		 * 128 = MLIP
-		 * 256 = Experimental
-		 */
-		uint32_t target_architecture;
-		uint8_t reserved[104];
+	/* Model target architecture
+	 * 0 = Undefined
+	 * 1 = M1K
+	 * 128 = MLIP
+	 * 256 = Experimental
+	 */
+	uint32_t target_architecture;
+	uint8_t reserved[104];
 
-		/* CRC of data after metadata_header (i.e. after first 256 bytes) */
-		uint32_t payload_crc32c;
+	/* CRC of data after header (i.e. after first 256 bytes) */
+	uint32_t payload_crc32c;
 
-		/* CRC of first 252 bytes of metadata_header, after payload_crc calculation */
-		uint32_t header_crc32c;
-	} metadata_header;
+	/* CRC of first 252 bytes of header, after payload_crc calculation */
+	uint32_t header_crc32c;
+};
 
-	/* Model information (256-byte) */
-	struct {
-		/* Model name string */
-		uint8_t name[MRVL_ML_MODEL_NAME_LEN];
+/* Model information (256-byte) */
+struct cn10k_ml_model_metadata_model {
+	/* Model name string */
+	uint8_t name[MRVL_ML_MODEL_NAME_LEN];
 
-		/* Model version info (xx.xx.xx.xx) */
-		uint8_t version[4];
+	/* Model version info (xx.xx.xx.xx) */
+	uint8_t version[4];
 
-		/* Model code size (Init + Main + Finish) */
-		uint32_t code_size;
+	/* Model code size (Init + Main + Finish) */
+	uint32_t code_size;
 
-		/* Model data size (Weights and Bias) */
-		uint32_t data_size;
+	/* Model data size (Weights and Bias) */
+	uint32_t data_size;
 
-		/* OCM start offset, set to ocm_wb_range_start */
-		uint32_t ocm_start;
+	/* OCM start offset, set to ocm_wb_range_start */
+	uint32_t ocm_start;
 
-		/* OCM start offset, set to max OCM size */
-		uint32_t ocm_end;
+	/* OCM start offset, set to max OCM size */
+	uint32_t ocm_end;
 
-		/* Relocatable flag (always yes)
-		 * 0 = Not relocatable
-		 * 1 = Relocatable
-		 */
-		uint8_t ocm_relocatable;
+	/* Relocatable flag (always yes)
+	 * 0 = Not relocatable
+	 * 1 = Relocatable
+	 */
+	uint8_t ocm_relocatable;
 
-		/* Tile relocatable flag (always yes)
-		 * 0 = Not relocatable
-		 * 1 = Relocatable
-		 */
-		uint8_t tile_relocatable;
+	/* Tile relocatable flag (always yes)
+	 * 0 = Not relocatable
+	 * 1 = Relocatable
+	 */
+	uint8_t tile_relocatable;
 
-		/* Start tile (Always 0) */
-		uint8_t tile_start;
+	/* Start tile (Always 0) */
+	uint8_t tile_start;
 
-		/* End tile (num_tiles - 1) */
-		uint8_t tile_end;
+	/* End tile (num_tiles - 1) */
+	uint8_t tile_end;
 
-		/* Inference batch size */
-		uint8_t batch_size;
+	/* Inference batch size */
+	uint8_t batch_size;
 
-		/* Number of input tensors (Max 8) */
-		uint8_t num_input;
+	/* Number of input tensors (Max 8) */
+	uint8_t num_input;
 
-		/* Number of output tensors (Max 8) */
-		uint8_t num_output;
-		uint8_t reserved1;
+	/* Number of output tensors (Max 8) */
+	uint8_t num_output;
+	uint8_t reserved_1;
 
-		/* Total input size in bytes */
-		uint32_t input_size;
+	/* Total input size in bytes */
+	uint32_t input_size;
 
-		/* Total output size in bytes */
-		uint32_t output_size;
+	/* Total output size in bytes */
+	uint32_t output_size;
 
-		/* Table size in bytes */
-		uint32_t table_size;
+	/* Table size in bytes */
+	uint32_t table_size;
 
-		/* Number of layers in the network */
-		uint32_t num_layers;
-		uint32_t reserved2;
+	/* Number of layers in the network */
+	uint32_t num_layers;
+	uint32_t reserved_2;
 
-		/* Floor of absolute OCM region */
-		uint64_t ocm_tmp_range_floor;
+	/* Floor of absolute OCM region */
+	uint64_t ocm_tmp_range_floor;
 
-		/* Relative OCM start address of WB data block */
-		uint64_t ocm_wb_range_start;
+	/* Relative OCM start address of WB data block */
+	uint64_t ocm_wb_range_start;
 
-		/* Relative OCM end address of WB data block */
-		uint64_t ocm_wb_range_end;
+	/* Relative OCM end address of WB data block */
+	uint64_t ocm_wb_range_end;
 
-		/* Relative DDR start address of WB data block */
-		uint64_t ddr_wb_range_start;
+	/* Relative DDR start address of WB data block */
+	uint64_t ddr_wb_range_start;
 
-		/* Relative DDR end address of all outputs */
-		uint64_t ddr_wb_range_end;
+	/* Relative DDR end address of all outputs */
+	uint64_t ddr_wb_range_end;
 
-		/* Relative DDR start address of all inputs */
-		uint64_t ddr_input_range_start;
+	/* Relative DDR start address of all inputs */
+	uint64_t ddr_input_range_start;
 
-		/* Relative DDR end address of all inputs */
-		uint64_t ddr_input_range_end;
+	/* Relative DDR end address of all inputs */
+	uint64_t ddr_input_range_end;
 
-		/* Relative DDR start address of all outputs */
-		uint64_t ddr_output_range_start;
+	/* Relative DDR start address of all outputs */
+	uint64_t ddr_output_range_start;
 
-		/* Relative DDR end address of all outputs */
-		uint64_t ddr_output_range_end;
+	/* Relative DDR end address of all outputs */
+	uint64_t ddr_output_range_end;
 
-		/* Compiler version */
-		uint8_t compiler_version[8];
+	/* Compiler version */
+	uint8_t compiler_version[8];
 
-		/* CDK version */
-		uint8_t cdk_version[4];
+	/* CDK version */
+	uint8_t cdk_version[4];
 
-		/* Lower batch optimization support
-		 * 0 - No,
-		 * 1 - Yes
-		 */
-		uint8_t supports_lower_batch_size_optimization;
-		uint8_t reserved3[59];
-	} model;
+	/* Lower batch optimization support
+	 * 0 - No,
+	 * 1 - Yes
+	 */
+	uint8_t supports_lower_batch_size_optimization;
+	uint8_t reserved_3[59];
+};
 
-	/* Init section (64-byte) */
-	struct {
-		uint32_t file_offset;
-		uint32_t file_size;
-		uint8_t reserved[56];
-	} init_model;
+/* Init section (64-byte) */
+struct cn10k_ml_model_metadata_init_section {
+	uint32_t file_offset;
+	uint32_t file_size;
+	uint8_t reserved[56];
+};
 
-	/* Main section (64-byte) */
-	struct {
-		uint32_t file_offset;
-		uint32_t file_size;
-		uint8_t reserved[56];
-	} main_model;
+/* Main section (64-byte) */
+struct cn10k_ml_model_metadata_main_section {
+	uint32_t file_offset;
+	uint32_t file_size;
+	uint8_t reserved[56];
+};
 
-	/* Finish section (64-byte) */
-	struct {
-		uint32_t file_offset;
-		uint32_t file_size;
-		uint8_t reserved[56];
-	} finish_model;
+/* Finish section (64-byte) */
+struct cn10k_ml_model_metadata_finish_section {
+	uint32_t file_offset;
+	uint32_t file_size;
+	uint8_t reserved[56];
+};
 
-	uint8_t reserved1[512]; /* End of 2k bytes */
+/* Weights and Bias (64-byte) */
+struct cn10k_ml_model_metadata_weights_bias_section {
+	/* Memory offset, set to ddr_wb_range_start */
+	uint64_t mem_offset;
+	uint32_t file_offset;
+	uint32_t file_size;
 
-	/* Weights and Bias (64-byte) */
+	/* Relocatable flag for WB
+	 * 1 = Relocatable
+	 * 2 = Not relocatable
+	 */
+	uint8_t relocatable;
+	uint8_t reserved[47];
+};
+
+/* Input section (64-byte per input) */
+struct cn10k_ml_model_metadata_input_section {
+	/* DDR offset (in OCM absolute addresses for input) */
+	uint64_t mem_offset;
+
+	/* Relocatable flag
+	 * 1 = Relocatable
+	 * 2 = Not relocatable
+	 */
+	uint8_t relocatable;
+
+	/* Input quantization
+	 * 1 = Requires quantization
+	 * 2 = Pre-quantized
+	 */
+	uint8_t quantize;
+
+	/* Type of incoming input
+	 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+	 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+	 */
+	uint8_t input_type;
+
+	/* Type of input required by model
+	 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+	 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+	 */
+	uint8_t model_input_type;
+
+	/* float_32 qscale value
+	 * quantized = non-quantized * qscale
+	 */
+	float qscale;
+
+	/* Input shape */
 	struct {
-		/* Memory offset, set to ddr_wb_range_start */
-		uint64_t mem_offset;
-		uint32_t file_offset;
-		uint32_t file_size;
-
-		/* Relocatable flag for WB
-		 * 1 = Relocatable
-		 * 2 = Not relocatable
+		/* Input format
+		 * 1 = NCHW
+		 * 2 = NHWC
 		 */
-		uint8_t relocatable;
-		uint8_t reserved[47];
-	} weights_bias;
+		uint8_t format;
+		uint8_t reserved[3];
+		uint32_t w;
+		uint32_t x;
+		uint32_t y;
+		uint32_t z;
+	} shape;
+	uint8_t reserved[4];
+
+	/* Name of input */
+	uint8_t input_name[MRVL_ML_INPUT_NAME_LEN];
+
+	/* DDR range end
+	 * new = mem_offset + size_bytes - 1
+	 */
+	uint64_t ddr_range_end;
+};
 
-	/* Input (512-byte, 64-byte per input) provisioned for 8 inputs */
-	struct {
-		/* DDR offset (in OCM absolute addresses for input) */
-		uint64_t mem_offset;
+/* Output section (64-byte per output) */
+struct cn10k_ml_model_metadata_output_section {
+	/* DDR offset in OCM absolute addresses for output */
+	uint64_t mem_offset;
 
-		/* Relocatable flag
-		 * 1 = Relocatable
-		 * 2 = Not relocatable
-		 */
-		uint8_t relocatable;
+	/* Relocatable flag
+	 * 1 = Relocatable
+	 * 2 = Not relocatable
+	 */
+	uint8_t relocatable;
 
-		/* Input quantization
-		 * 1 = Requires quantization
-		 * 2 = Pre-quantized
-		 */
-		uint8_t quantize;
+	/* Output dequantization
+	 * 1 = De-quantization required
+	 * 2 = De-quantization not required
+	 */
+	uint8_t dequantize;
 
-		/* Type of incoming input
-		 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
-		 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
-		 */
-		uint8_t input_type;
+	/* Type of outgoing output
+	 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+	 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+	 */
+	uint8_t output_type;
 
-		/* Type of input required by model
-		 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
-		 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
-		 */
-		uint8_t model_input_type;
+	/* Type of output produced by model
+	 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+	 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+	 */
+	uint8_t model_output_type;
 
-		/* float_32 qscale value
-		 * quantized = non-quantized * qscale
-		 */
-		float qscale;
-
-		/* Input shape */
-		struct {
-			/* Input format
-			 * 1 = NCHW
-			 * 2 = NHWC
-			 */
-			uint8_t format;
-			uint8_t reserved[3];
-			uint32_t w;
-			uint32_t x;
-			uint32_t y;
-			uint32_t z;
-		} shape;
-		uint8_t reserved[4];
-
-		/* Name of input */
-		uint8_t input_name[MRVL_ML_INPUT_NAME_LEN];
-
-		/* DDR range end
-		 * new = mem_offset + size_bytes - 1
-		 */
-		uint64_t ddr_range_end;
-	} input[MRVL_ML_INPUT_OUTPUT_SIZE];
+	/* float_32 dscale value
+	 * dequantized = quantized * dscale
+	 */
+	float dscale;
 
-	/* Output (512 byte, 64-byte per input) provisioned for 8 outputs */
-	struct {
-		/* DDR offset in OCM absolute addresses for output */
-		uint64_t mem_offset;
+	/* Number of items in the output */
+	uint32_t size;
+	uint8_t reserved[20];
 
-		/* Relocatable flag
-		 * 1 = Relocatable
-		 * 2 = Not relocatable
-		 */
-		uint8_t relocatable;
+	/* DDR range end
+	 * new = mem_offset + size_bytes - 1
+	 */
+	uint64_t ddr_range_end;
+	uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN];
+};
 
-		/* Output dequantization
-		 * 1 = De-quantization required
-		 * 2 = De-quantization not required
-		 */
-		uint8_t dequantize;
+/* Model data */
+struct cn10k_ml_model_metadata_data_section {
+	uint8_t reserved[4068];
 
-		/* Type of outgoing output
-		 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
-		 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
-		 */
-		uint8_t output_type;
+	/* Beta: xx.xx.xx.xx,
+	 * Later: YYYYMM.xx.xx
+	 */
+	uint8_t compiler_version[8];
 
-		/* Type of output produced by model
-		 * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
-		 * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
-		 */
-		uint8_t model_output_type;
+	/* M1K CDK version (xx.xx.xx.xx) */
+	uint8_t m1k_cdk_version[4];
+};
 
-		/* float_32 dscale value
-		 * dequantized = quantized * dscale
-		 */
-		float dscale;
+/* Model file metadata structure */
+struct cn10k_ml_model_metadata {
+	/* Header (256-byte) */
+	struct cn10k_ml_model_metadata_header header;
 
-		/* Number of items in the output */
-		uint32_t size;
-		uint8_t reserved[20];
+	/* Model information (256-byte) */
+	struct cn10k_ml_model_metadata_model model;
 
-		/* DDR range end
-		 * new = mem_offset + size_bytes - 1
-		 */
-		uint64_t ddr_range_end;
-		uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN];
-	} output[MRVL_ML_INPUT_OUTPUT_SIZE];
+	/* Init section (64-byte) */
+	struct cn10k_ml_model_metadata_init_section init_model;
 
-	uint8_t reserved2[1792];
+	/* Main section (64-byte) */
+	struct cn10k_ml_model_metadata_main_section main_model;
 
-	/* Model data */
-	struct {
-		uint8_t reserved1[4068];
+	/* Finish section (64-byte) */
+	struct cn10k_ml_model_metadata_finish_section finish_model;
 
-		/* Beta: xx.xx.xx.xx,
-		 * Later: YYYYMM.xx.xx
-		 */
-		uint8_t compiler_version[8];
+	uint8_t reserved_1[512]; /* End of 2k bytes */
+
+	/* Weights and Bias (64-byte) */
+	struct cn10k_ml_model_metadata_weights_bias_section weights_bias;
+
+	/* Input (512-bytes, 64-byte per input) provisioned for 8 inputs */
+	struct cn10k_ml_model_metadata_input_section input[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+	/* Output (512-bytes, 64-byte per output) provisioned for 8 outputs */
+	struct cn10k_ml_model_metadata_output_section output[MRVL_ML_INPUT_OUTPUT_SIZE];
 
-		/* M1K CDK version (xx.xx.xx.xx) */
-		uint8_t m1k_cdk_version[4];
-	} data;
+	uint8_t reserved_2[1792];
+
+	/* Model data */
+	struct cn10k_ml_model_metadata_data_section data;
 
 	/* Hidden 16 bytes of magic code */
-	uint8_t reserved3[16];
+	uint8_t reserved_3[16];
 };
 
 /* Model address structure */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 2/3] ml/cnxk: update model metadata to v2301
  2023-04-23  5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
@ 2023-04-23  5:08 ` Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
  2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon
  3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23  5:08 UTC (permalink / raw)
  To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar

Update model metadata to v2301. Revised metadata introduces
fields to support up to 32 inputs/outputs per model, scratch
relocation and updates to names of existing fields. Update
driver files to include changes in names of metadata fields.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_model.c | 111 ++++++++++++++++---------------
 drivers/ml/cnxk/cn10k_ml_model.h |  36 +++++++---
 drivers/ml/cnxk/cn10k_ml_ops.c   |  50 +++++++-------
 3 files changed, 106 insertions(+), 91 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index c0b7b061f5..a15df700aa 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -83,11 +83,11 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 
 	/* Header version */
 	rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
-	if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) {
+	if (version[0] * 1000 + version[1] * 100 != MRVL_ML_MODEL_VERSION_MIN) {
 		plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
-			version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10,
-			(MRVL_ML_MODEL_VERSION / 100) % 10, (MRVL_ML_MODEL_VERSION / 10) % 10,
-			MRVL_ML_MODEL_VERSION % 10);
+			version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
+			(MRVL_ML_MODEL_VERSION_MIN / 100) % 10,
+			(MRVL_ML_MODEL_VERSION_MIN / 10) % 10, MRVL_ML_MODEL_VERSION_MIN % 10);
 		return -ENOTSUP;
 	}
 
@@ -125,36 +125,36 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	}
 
 	/* Check input count */
-	if (metadata->model.num_input > MRVL_ML_INPUT_OUTPUT_SIZE) {
+	if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
 		plt_err("Invalid metadata, num_input  = %u (> %u)", metadata->model.num_input,
-			MRVL_ML_INPUT_OUTPUT_SIZE);
+			MRVL_ML_NUM_INPUT_OUTPUT_1);
 		return -EINVAL;
 	}
 
 	/* Check output count */
-	if (metadata->model.num_output > MRVL_ML_INPUT_OUTPUT_SIZE) {
+	if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
 		plt_err("Invalid metadata, num_output  = %u (> %u)", metadata->model.num_output,
-			MRVL_ML_INPUT_OUTPUT_SIZE);
+			MRVL_ML_NUM_INPUT_OUTPUT_1);
 		return -EINVAL;
 	}
 
 	/* Inputs */
 	for (i = 0; i < metadata->model.num_input; i++) {
-		if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input[i].input_type)) <=
+		if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input1[i].input_type)) <=
 		    0) {
 			plt_err("Invalid metadata, input[%u] : input_type = %u", i,
-				metadata->input[i].input_type);
+				metadata->input1[i].input_type);
 			return -EINVAL;
 		}
 
 		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->input[i].model_input_type)) <= 0) {
+			    cn10k_ml_io_type_map(metadata->input1[i].model_input_type)) <= 0) {
 			plt_err("Invalid metadata, input[%u] : model_input_type = %u", i,
-				metadata->input[i].model_input_type);
+				metadata->input1[i].model_input_type);
 			return -EINVAL;
 		}
 
-		if (metadata->input[i].relocatable != 1) {
+		if (metadata->input1[i].relocatable != 1) {
 			plt_err("Model not supported, non-relocatable input: %u", i);
 			return -ENOTSUP;
 		}
@@ -163,20 +163,20 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	/* Outputs */
 	for (i = 0; i < metadata->model.num_output; i++) {
 		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->output[i].output_type)) <= 0) {
+			    cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
 			plt_err("Invalid metadata, output[%u] : output_type = %u", i,
-				metadata->output[i].output_type);
+				metadata->output1[i].output_type);
 			return -EINVAL;
 		}
 
 		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->output[i].model_output_type)) <= 0) {
+			    cn10k_ml_io_type_map(metadata->output1[i].model_output_type)) <= 0) {
 			plt_err("Invalid metadata, output[%u] : model_output_type = %u", i,
-				metadata->output[i].model_output_type);
+				metadata->output1[i].model_output_type);
 			return -EINVAL;
 		}
 
-		if (metadata->output[i].relocatable != 1) {
+		if (metadata->output1[i].relocatable != 1) {
 			plt_err("Model not supported, non-relocatable output: %u", i);
 			return -ENOTSUP;
 		}
@@ -191,28 +191,29 @@ cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
 	uint8_t i;
 
 	for (i = 0; i < metadata->model.num_input; i++) {
-		metadata->input[i].input_type = cn10k_ml_io_type_map(metadata->input[i].input_type);
-		metadata->input[i].model_input_type =
-			cn10k_ml_io_type_map(metadata->input[i].model_input_type);
+		metadata->input1[i].input_type =
+			cn10k_ml_io_type_map(metadata->input1[i].input_type);
+		metadata->input1[i].model_input_type =
+			cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
 
-		if (metadata->input[i].shape.w == 0)
-			metadata->input[i].shape.w = 1;
+		if (metadata->input1[i].shape.w == 0)
+			metadata->input1[i].shape.w = 1;
 
-		if (metadata->input[i].shape.x == 0)
-			metadata->input[i].shape.x = 1;
+		if (metadata->input1[i].shape.x == 0)
+			metadata->input1[i].shape.x = 1;
 
-		if (metadata->input[i].shape.y == 0)
-			metadata->input[i].shape.y = 1;
+		if (metadata->input1[i].shape.y == 0)
+			metadata->input1[i].shape.y = 1;
 
-		if (metadata->input[i].shape.z == 0)
-			metadata->input[i].shape.z = 1;
+		if (metadata->input1[i].shape.z == 0)
+			metadata->input1[i].shape.z = 1;
 	}
 
 	for (i = 0; i < metadata->model.num_output; i++) {
-		metadata->output[i].output_type =
-			cn10k_ml_io_type_map(metadata->output[i].output_type);
-		metadata->output[i].model_output_type =
-			cn10k_ml_io_type_map(metadata->output[i].model_output_type);
+		metadata->output1[i].output_type =
+			cn10k_ml_io_type_map(metadata->output1[i].output_type);
+		metadata->output1[i].model_output_type =
+			cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
 	}
 }
 
@@ -272,31 +273,31 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
 	addr->total_input_sz_q = 0;
 	for (i = 0; i < metadata->model.num_input; i++) {
 		addr->input[i].nb_elements =
-			metadata->input[i].shape.w * metadata->input[i].shape.x *
-			metadata->input[i].shape.y * metadata->input[i].shape.z;
+			metadata->input1[i].shape.w * metadata->input1[i].shape.x *
+			metadata->input1[i].shape.y * metadata->input1[i].shape.z;
 		addr->input[i].sz_d = addr->input[i].nb_elements *
-				      rte_ml_io_type_size_get(metadata->input[i].input_type);
+				      rte_ml_io_type_size_get(metadata->input1[i].input_type);
 		addr->input[i].sz_q = addr->input[i].nb_elements *
-				      rte_ml_io_type_size_get(metadata->input[i].model_input_type);
+				      rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
 		addr->total_input_sz_d += addr->input[i].sz_d;
 		addr->total_input_sz_q += addr->input[i].sz_q;
 
 		plt_ml_dbg("model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
-			   model->model_id, i, metadata->input[i].shape.w,
-			   metadata->input[i].shape.x, metadata->input[i].shape.y,
-			   metadata->input[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
+			   model->model_id, i, metadata->input1[i].shape.w,
+			   metadata->input1[i].shape.x, metadata->input1[i].shape.y,
+			   metadata->input1[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
 	}
 
 	/* Outputs */
 	addr->total_output_sz_q = 0;
 	addr->total_output_sz_d = 0;
 	for (i = 0; i < metadata->model.num_output; i++) {
-		addr->output[i].nb_elements = metadata->output[i].size;
+		addr->output[i].nb_elements = metadata->output1[i].size;
 		addr->output[i].sz_d = addr->output[i].nb_elements *
-				       rte_ml_io_type_size_get(metadata->output[i].output_type);
+				       rte_ml_io_type_size_get(metadata->output1[i].output_type);
 		addr->output[i].sz_q =
 			addr->output[i].nb_elements *
-			rte_ml_io_type_size_get(metadata->output[i].model_output_type);
+			rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
 		addr->total_output_sz_q += addr->output[i].sz_q;
 		addr->total_output_sz_d += addr->output[i].sz_d;
 
@@ -388,24 +389,24 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
 
 	/* Set input info */
 	for (i = 0; i < info->nb_inputs; i++) {
-		rte_memcpy(input[i].name, metadata->input[i].input_name, MRVL_ML_INPUT_NAME_LEN);
-		input[i].dtype = metadata->input[i].input_type;
-		input[i].qtype = metadata->input[i].model_input_type;
-		input[i].shape.format = metadata->input[i].shape.format;
-		input[i].shape.w = metadata->input[i].shape.w;
-		input[i].shape.x = metadata->input[i].shape.x;
-		input[i].shape.y = metadata->input[i].shape.y;
-		input[i].shape.z = metadata->input[i].shape.z;
+		rte_memcpy(input[i].name, metadata->input1[i].input_name, MRVL_ML_INPUT_NAME_LEN);
+		input[i].dtype = metadata->input1[i].input_type;
+		input[i].qtype = metadata->input1[i].model_input_type;
+		input[i].shape.format = metadata->input1[i].shape.format;
+		input[i].shape.w = metadata->input1[i].shape.w;
+		input[i].shape.x = metadata->input1[i].shape.x;
+		input[i].shape.y = metadata->input1[i].shape.y;
+		input[i].shape.z = metadata->input1[i].shape.z;
 	}
 
 	/* Set output info */
 	for (i = 0; i < info->nb_outputs; i++) {
-		rte_memcpy(output[i].name, metadata->output[i].output_name,
+		rte_memcpy(output[i].name, metadata->output1[i].output_name,
 			   MRVL_ML_OUTPUT_NAME_LEN);
-		output[i].dtype = metadata->output[i].output_type;
-		output[i].qtype = metadata->output[i].model_output_type;
+		output[i].dtype = metadata->output1[i].output_type;
+		output[i].qtype = metadata->output1[i].model_output_type;
 		output[i].shape.format = RTE_ML_IO_FORMAT_1D;
-		output[i].shape.w = metadata->output[i].size;
+		output[i].shape.w = metadata->output1[i].size;
 		output[i].shape.x = 1;
 		output[i].shape.y = 1;
 		output[i].shape.z = 1;
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index b30ad5a981..bd863a8c12 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -21,14 +21,15 @@ enum cn10k_ml_model_state {
 	ML_CN10K_MODEL_STATE_UNKNOWN,
 };
 
-/* Model Metadata : v 2.1.0.2 */
+/* Model Metadata : v 2.3.0.1 */
 #define MRVL_ML_MODEL_MAGIC_STRING "MRVL"
 #define MRVL_ML_MODEL_TARGET_ARCH  128
-#define MRVL_ML_MODEL_VERSION	   2100
+#define MRVL_ML_MODEL_VERSION_MIN  2100
 #define MRVL_ML_MODEL_NAME_LEN	   64
 #define MRVL_ML_INPUT_NAME_LEN	   16
 #define MRVL_ML_OUTPUT_NAME_LEN	   16
-#define MRVL_ML_INPUT_OUTPUT_SIZE  8
+#define MRVL_ML_NUM_INPUT_OUTPUT_1 8
+#define MRVL_ML_NUM_INPUT_OUTPUT_2 24
 
 /* Header (256-byte) */
 struct cn10k_ml_model_metadata_header {
@@ -101,10 +102,10 @@ struct cn10k_ml_model_metadata_model {
 	/* Inference batch size */
 	uint8_t batch_size;
 
-	/* Number of input tensors (Max 8) */
+	/* Number of input tensors (Max 32) */
 	uint8_t num_input;
 
-	/* Number of output tensors (Max 8) */
+	/* Number of output tensors (Max 32) */
 	uint8_t num_output;
 	uint8_t reserved_1;
 
@@ -159,7 +160,14 @@ struct cn10k_ml_model_metadata_model {
 	 * 1 - Yes
 	 */
 	uint8_t supports_lower_batch_size_optimization;
-	uint8_t reserved_3[59];
+	uint8_t reserved_3[3];
+
+	/* Relative DDR start address of scratch space */
+	uint64_t ddr_scratch_range_start;
+
+	/* Relative DDR end address of scratch space */
+	uint64_t ddr_scratch_range_end;
+	uint8_t reserved_4[40];
 };
 
 /* Init section (64-byte) */
@@ -303,7 +311,7 @@ struct cn10k_ml_model_metadata_output_section {
 
 /* Model data */
 struct cn10k_ml_model_metadata_data_section {
-	uint8_t reserved[4068];
+	uint8_t reserved[996];
 
 	/* Beta: xx.xx.xx.xx,
 	 * Later: YYYYMM.xx.xx
@@ -337,13 +345,19 @@ struct cn10k_ml_model_metadata {
 	struct cn10k_ml_model_metadata_weights_bias_section weights_bias;
 
 	/* Input (512-bytes, 64-byte per input) provisioned for 8 inputs */
-	struct cn10k_ml_model_metadata_input_section input[MRVL_ML_INPUT_OUTPUT_SIZE];
+	struct cn10k_ml_model_metadata_input_section input1[MRVL_ML_NUM_INPUT_OUTPUT_1];
 
 	/* Output (512-bytes, 64-byte per output) provisioned for 8 outputs */
-	struct cn10k_ml_model_metadata_output_section output[MRVL_ML_INPUT_OUTPUT_SIZE];
+	struct cn10k_ml_model_metadata_output_section output1[MRVL_ML_NUM_INPUT_OUTPUT_1];
 
 	uint8_t reserved_2[1792];
 
+	/* Input (1536-bytes, 64-byte per input) provisioned for 24 inputs */
+	struct cn10k_ml_model_metadata_input_section input2[MRVL_ML_NUM_INPUT_OUTPUT_2];
+
+	/* Output (1536-bytes, 64-byte per output) provisioned for 24 outputs */
+	struct cn10k_ml_model_metadata_output_section output2[MRVL_ML_NUM_INPUT_OUTPUT_2];
+
 	/* Model data */
 	struct cn10k_ml_model_metadata_data_section data;
 
@@ -399,7 +413,7 @@ struct cn10k_ml_model_addr {
 
 		/* Quantized input size */
 		uint32_t sz_q;
-	} input[MRVL_ML_INPUT_OUTPUT_SIZE];
+	} input[MRVL_ML_NUM_INPUT_OUTPUT_1];
 
 	/* Output address and size */
 	struct {
@@ -411,7 +425,7 @@ struct cn10k_ml_model_addr {
 
 		/* Quantized output size */
 		uint32_t sz_q;
-	} output[MRVL_ML_INPUT_OUTPUT_SIZE];
+	} output[MRVL_ML_NUM_INPUT_OUTPUT_1];
 
 	/* Total size of quantized input */
 	uint32_t total_input_sz_q;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index b5eaa24e83..aecc6e74ad 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -325,13 +325,13 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
 	print_line(fp, LINE_LEN);
 	for (i = 0; i < model->metadata.model.num_input; i++) {
 		fprintf(fp, "%8u  ", i);
-		fprintf(fp, "%*s  ", 16, model->metadata.input[i].input_name);
-		rte_ml_io_type_to_str(model->metadata.input[i].input_type, str, STR_LEN);
+		fprintf(fp, "%*s  ", 16, model->metadata.input1[i].input_name);
+		rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
 		fprintf(fp, "%*s  ", 12, str);
-		rte_ml_io_type_to_str(model->metadata.input[i].model_input_type, str, STR_LEN);
+		rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str, STR_LEN);
 		fprintf(fp, "%*s  ", 18, str);
-		fprintf(fp, "%*s", 12, (model->metadata.input[i].quantize == 1 ? "Yes" : "No"));
-		rte_ml_io_format_to_str(model->metadata.input[i].shape.format, str, STR_LEN);
+		fprintf(fp, "%*s", 12, (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
+		rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str, STR_LEN);
 		fprintf(fp, "%*s", 16, str);
 		fprintf(fp, "\n");
 	}
@@ -343,12 +343,12 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
 	print_line(fp, LINE_LEN);
 	for (i = 0; i < model->metadata.model.num_output; i++) {
 		fprintf(fp, "%8u  ", i);
-		fprintf(fp, "%*s  ", 16, model->metadata.output[i].output_name);
-		rte_ml_io_type_to_str(model->metadata.output[i].output_type, str, STR_LEN);
+		fprintf(fp, "%*s  ", 16, model->metadata.output1[i].output_name);
+		rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
 		fprintf(fp, "%*s  ", 12, str);
-		rte_ml_io_type_to_str(model->metadata.output[i].model_output_type, str, STR_LEN);
+		rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str, STR_LEN);
 		fprintf(fp, "%*s  ", 18, str);
-		fprintf(fp, "%*s", 12, (model->metadata.output[i].dequantize == 1 ? "Yes" : "No"));
+		fprintf(fp, "%*s", 12, (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
 		fprintf(fp, "\n");
 	}
 	fprintf(fp, "\n");
@@ -1882,28 +1882,28 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
 
 next_batch:
 	for (i = 0; i < model->metadata.model.num_input; i++) {
-		if (model->metadata.input[i].input_type ==
-		    model->metadata.input[i].model_input_type) {
+		if (model->metadata.input1[i].input_type ==
+		    model->metadata.input1[i].model_input_type) {
 			rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
 		} else {
-			switch (model->metadata.input[i].model_input_type) {
+			switch (model->metadata.input1[i].model_input_type) {
 			case RTE_ML_IO_TYPE_INT8:
-				ret = rte_ml_io_float32_to_int8(model->metadata.input[i].qscale,
+				ret = rte_ml_io_float32_to_int8(model->metadata.input1[i].qscale,
 								model->addr.input[i].nb_elements,
 								lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT8:
-				ret = rte_ml_io_float32_to_uint8(model->metadata.input[i].qscale,
+				ret = rte_ml_io_float32_to_uint8(model->metadata.input1[i].qscale,
 								 model->addr.input[i].nb_elements,
 								 lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_INT16:
-				ret = rte_ml_io_float32_to_int16(model->metadata.input[i].qscale,
+				ret = rte_ml_io_float32_to_int16(model->metadata.input1[i].qscale,
 								 model->addr.input[i].nb_elements,
 								 lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT16:
-				ret = rte_ml_io_float32_to_uint16(model->metadata.input[i].qscale,
+				ret = rte_ml_io_float32_to_uint16(model->metadata.input1[i].qscale,
 								  model->addr.input[i].nb_elements,
 								  lcl_dbuffer, lcl_qbuffer);
 				break;
@@ -1913,7 +1913,7 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
 				break;
 			default:
 				plt_err("Unsupported model_input_type[%u] : %u", i,
-					model->metadata.input[i].model_input_type);
+					model->metadata.input1[i].model_input_type);
 				ret = -ENOTSUP;
 			}
 			if (ret < 0)
@@ -1955,28 +1955,28 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
 
 next_batch:
 	for (i = 0; i < model->metadata.model.num_output; i++) {
-		if (model->metadata.output[i].output_type ==
-		    model->metadata.output[i].model_output_type) {
+		if (model->metadata.output1[i].output_type ==
+		    model->metadata.output1[i].model_output_type) {
 			rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
 		} else {
-			switch (model->metadata.output[i].model_output_type) {
+			switch (model->metadata.output1[i].model_output_type) {
 			case RTE_ML_IO_TYPE_INT8:
-				ret = rte_ml_io_int8_to_float32(model->metadata.output[i].dscale,
+				ret = rte_ml_io_int8_to_float32(model->metadata.output1[i].dscale,
 								model->addr.output[i].nb_elements,
 								lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT8:
-				ret = rte_ml_io_uint8_to_float32(model->metadata.output[i].dscale,
+				ret = rte_ml_io_uint8_to_float32(model->metadata.output1[i].dscale,
 								 model->addr.output[i].nb_elements,
 								 lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_INT16:
-				ret = rte_ml_io_int16_to_float32(model->metadata.output[i].dscale,
+				ret = rte_ml_io_int16_to_float32(model->metadata.output1[i].dscale,
 								 model->addr.output[i].nb_elements,
 								 lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT16:
-				ret = rte_ml_io_uint16_to_float32(model->metadata.output[i].dscale,
+				ret = rte_ml_io_uint16_to_float32(model->metadata.output1[i].dscale,
 								  model->addr.output[i].nb_elements,
 								  lcl_qbuffer, lcl_dbuffer);
 				break;
@@ -1987,7 +1987,7 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
 				break;
 			default:
 				plt_err("Unsupported model_output_type[%u] : %u", i,
-					model->metadata.output[i].model_output_type);
+					model->metadata.output1[i].model_output_type);
 				ret = -ENOTSUP;
 			}
 			if (ret < 0)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model
  2023-04-23  5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
  2023-04-23  5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
@ 2023-04-23  5:08 ` Srikanth Yalavarthi
  2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon
  3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23  5:08 UTC (permalink / raw)
  To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar

Added support for 32 inputs and outputs per model.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_model.c | 374 ++++++++++++++++++++++---------
 drivers/ml/cnxk/cn10k_ml_model.h |   5 +-
 drivers/ml/cnxk/cn10k_ml_ops.c   | 125 ++++++++---
 3 files changed, 367 insertions(+), 137 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index a15df700aa..92c47d39ba 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -41,8 +41,9 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	struct cn10k_ml_model_metadata *metadata;
 	uint32_t payload_crc32c;
 	uint32_t header_crc32c;
-	uint8_t version[4];
+	uint32_t version;
 	uint8_t i;
+	uint8_t j;
 
 	metadata = (struct cn10k_ml_model_metadata *)buffer;
 
@@ -82,10 +83,13 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	}
 
 	/* Header version */
-	rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
-	if (version[0] * 1000 + version[1] * 100 != MRVL_ML_MODEL_VERSION_MIN) {
-		plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
-			version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
+	version = metadata->header.version[0] * 1000 + metadata->header.version[1] * 100 +
+		  metadata->header.version[2] * 10 + metadata->header.version[3];
+	if (version < MRVL_ML_MODEL_VERSION_MIN) {
+		plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported",
+			metadata->header.version[0], metadata->header.version[1],
+			metadata->header.version[2], metadata->header.version[3],
+			(MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
 			(MRVL_ML_MODEL_VERSION_MIN / 100) % 10,
 			(MRVL_ML_MODEL_VERSION_MIN / 10) % 10, MRVL_ML_MODEL_VERSION_MIN % 10);
 		return -ENOTSUP;
@@ -125,60 +129,119 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
 	}
 
 	/* Check input count */
-	if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
-		plt_err("Invalid metadata, num_input  = %u (> %u)", metadata->model.num_input,
-			MRVL_ML_NUM_INPUT_OUTPUT_1);
-		return -EINVAL;
-	}
-
-	/* Check output count */
-	if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
-		plt_err("Invalid metadata, num_output  = %u (> %u)", metadata->model.num_output,
-			MRVL_ML_NUM_INPUT_OUTPUT_1);
-		return -EINVAL;
-	}
-
-	/* Inputs */
-	for (i = 0; i < metadata->model.num_input; i++) {
-		if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input1[i].input_type)) <=
-		    0) {
-			plt_err("Invalid metadata, input[%u] : input_type = %u", i,
-				metadata->input1[i].input_type);
+	if (version < 2301) {
+		if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			plt_err("Invalid metadata, num_input  = %u (> %u)",
+				metadata->model.num_input, MRVL_ML_NUM_INPUT_OUTPUT_1);
 			return -EINVAL;
 		}
 
-		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->input1[i].model_input_type)) <= 0) {
-			plt_err("Invalid metadata, input[%u] : model_input_type = %u", i,
-				metadata->input1[i].model_input_type);
+		/* Check output count */
+		if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			plt_err("Invalid metadata, num_output  = %u (> %u)",
+				metadata->model.num_output, MRVL_ML_NUM_INPUT_OUTPUT_1);
 			return -EINVAL;
 		}
-
-		if (metadata->input1[i].relocatable != 1) {
-			plt_err("Model not supported, non-relocatable input: %u", i);
-			return -ENOTSUP;
+	} else {
+		if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT) {
+			plt_err("Invalid metadata, num_input  = %u (> %u)",
+				metadata->model.num_input, MRVL_ML_NUM_INPUT_OUTPUT);
+			return -EINVAL;
 		}
-	}
 
-	/* Outputs */
-	for (i = 0; i < metadata->model.num_output; i++) {
-		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
-			plt_err("Invalid metadata, output[%u] : output_type = %u", i,
-				metadata->output1[i].output_type);
+		/* Check output count */
+		if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT) {
+			plt_err("Invalid metadata, num_output  = %u (> %u)",
+				metadata->model.num_output, MRVL_ML_NUM_INPUT_OUTPUT);
 			return -EINVAL;
 		}
+	}
 
-		if (rte_ml_io_type_size_get(
-			    cn10k_ml_io_type_map(metadata->output1[i].model_output_type)) <= 0) {
-			plt_err("Invalid metadata, output[%u] : model_output_type = %u", i,
-				metadata->output1[i].model_output_type);
-			return -EINVAL;
+	/* Inputs */
+	for (i = 0; i < metadata->model.num_input; i++) {
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			if (rte_ml_io_type_size_get(
+				    cn10k_ml_io_type_map(metadata->input1[i].input_type)) <= 0) {
+				plt_err("Invalid metadata, input1[%u] : input_type = %u", i,
+					metadata->input1[i].input_type);
+				return -EINVAL;
+			}
+
+			if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+				    metadata->input1[i].model_input_type)) <= 0) {
+				plt_err("Invalid metadata, input1[%u] : model_input_type = %u", i,
+					metadata->input1[i].model_input_type);
+				return -EINVAL;
+			}
+
+			if (metadata->input1[i].relocatable != 1) {
+				plt_err("Model not supported, non-relocatable input1: %u", i);
+				return -ENOTSUP;
+			}
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			if (rte_ml_io_type_size_get(
+				    cn10k_ml_io_type_map(metadata->input2[j].input_type)) <= 0) {
+				plt_err("Invalid metadata, input2[%u] : input_type = %u", j,
+					metadata->input2[j].input_type);
+				return -EINVAL;
+			}
+
+			if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+				    metadata->input2[j].model_input_type)) <= 0) {
+				plt_err("Invalid metadata, input2[%u] : model_input_type = %u", j,
+					metadata->input2[j].model_input_type);
+				return -EINVAL;
+			}
+
+			if (metadata->input2[j].relocatable != 1) {
+				plt_err("Model not supported, non-relocatable input2: %u", j);
+				return -ENOTSUP;
+			}
 		}
+	}
 
-		if (metadata->output1[i].relocatable != 1) {
-			plt_err("Model not supported, non-relocatable output: %u", i);
-			return -ENOTSUP;
+	/* Outputs */
+	for (i = 0; i < metadata->model.num_output; i++) {
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			if (rte_ml_io_type_size_get(
+				    cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
+				plt_err("Invalid metadata, output1[%u] : output_type = %u", i,
+					metadata->output1[i].output_type);
+				return -EINVAL;
+			}
+
+			if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+				    metadata->output1[i].model_output_type)) <= 0) {
+				plt_err("Invalid metadata, output1[%u] : model_output_type = %u", i,
+					metadata->output1[i].model_output_type);
+				return -EINVAL;
+			}
+
+			if (metadata->output1[i].relocatable != 1) {
+				plt_err("Model not supported, non-relocatable output1: %u", i);
+				return -ENOTSUP;
+			}
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			if (rte_ml_io_type_size_get(
+				    cn10k_ml_io_type_map(metadata->output2[j].output_type)) <= 0) {
+				plt_err("Invalid metadata, output2[%u] : output_type = %u", j,
+					metadata->output2[j].output_type);
+				return -EINVAL;
+			}
+
+			if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+				    metadata->output2[j].model_output_type)) <= 0) {
+				plt_err("Invalid metadata, output2[%u] : model_output_type = %u", j,
+					metadata->output2[j].model_output_type);
+				return -EINVAL;
+			}
+
+			if (metadata->output2[j].relocatable != 1) {
+				plt_err("Model not supported, non-relocatable output2: %u", j);
+				return -ENOTSUP;
+			}
 		}
 	}
 
@@ -189,31 +252,60 @@ void
 cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
 {
 	uint8_t i;
+	uint8_t j;
 
 	for (i = 0; i < metadata->model.num_input; i++) {
-		metadata->input1[i].input_type =
-			cn10k_ml_io_type_map(metadata->input1[i].input_type);
-		metadata->input1[i].model_input_type =
-			cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			metadata->input1[i].input_type =
+				cn10k_ml_io_type_map(metadata->input1[i].input_type);
+			metadata->input1[i].model_input_type =
+				cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
+
+			if (metadata->input1[i].shape.w == 0)
+				metadata->input1[i].shape.w = 1;
+
+			if (metadata->input1[i].shape.x == 0)
+				metadata->input1[i].shape.x = 1;
+
+			if (metadata->input1[i].shape.y == 0)
+				metadata->input1[i].shape.y = 1;
 
-		if (metadata->input1[i].shape.w == 0)
-			metadata->input1[i].shape.w = 1;
+			if (metadata->input1[i].shape.z == 0)
+				metadata->input1[i].shape.z = 1;
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			metadata->input2[j].input_type =
+				cn10k_ml_io_type_map(metadata->input2[j].input_type);
+			metadata->input2[j].model_input_type =
+				cn10k_ml_io_type_map(metadata->input2[j].model_input_type);
 
-		if (metadata->input1[i].shape.x == 0)
-			metadata->input1[i].shape.x = 1;
+			if (metadata->input2[j].shape.w == 0)
+				metadata->input2[j].shape.w = 1;
 
-		if (metadata->input1[i].shape.y == 0)
-			metadata->input1[i].shape.y = 1;
+			if (metadata->input2[j].shape.x == 0)
+				metadata->input2[j].shape.x = 1;
 
-		if (metadata->input1[i].shape.z == 0)
-			metadata->input1[i].shape.z = 1;
+			if (metadata->input2[j].shape.y == 0)
+				metadata->input2[j].shape.y = 1;
+
+			if (metadata->input2[j].shape.z == 0)
+				metadata->input2[j].shape.z = 1;
+		}
 	}
 
 	for (i = 0; i < metadata->model.num_output; i++) {
-		metadata->output1[i].output_type =
-			cn10k_ml_io_type_map(metadata->output1[i].output_type);
-		metadata->output1[i].model_output_type =
-			cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			metadata->output1[i].output_type =
+				cn10k_ml_io_type_map(metadata->output1[i].output_type);
+			metadata->output1[i].model_output_type =
+				cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			metadata->output2[j].output_type =
+				cn10k_ml_io_type_map(metadata->output2[j].output_type);
+			metadata->output2[j].model_output_type =
+				cn10k_ml_io_type_map(metadata->output2[j].model_output_type);
+		}
 	}
 }
 
@@ -226,6 +318,7 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
 	uint8_t *dma_addr_load;
 	uint8_t *dma_addr_run;
 	uint8_t i;
+	uint8_t j;
 	int fpos;
 
 	metadata = &model->metadata;
@@ -272,37 +365,80 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
 	addr->total_input_sz_d = 0;
 	addr->total_input_sz_q = 0;
 	for (i = 0; i < metadata->model.num_input; i++) {
-		addr->input[i].nb_elements =
-			metadata->input1[i].shape.w * metadata->input1[i].shape.x *
-			metadata->input1[i].shape.y * metadata->input1[i].shape.z;
-		addr->input[i].sz_d = addr->input[i].nb_elements *
-				      rte_ml_io_type_size_get(metadata->input1[i].input_type);
-		addr->input[i].sz_q = addr->input[i].nb_elements *
-				      rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
-		addr->total_input_sz_d += addr->input[i].sz_d;
-		addr->total_input_sz_q += addr->input[i].sz_q;
-
-		plt_ml_dbg("model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
-			   model->model_id, i, metadata->input1[i].shape.w,
-			   metadata->input1[i].shape.x, metadata->input1[i].shape.y,
-			   metadata->input1[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			addr->input[i].nb_elements =
+				metadata->input1[i].shape.w * metadata->input1[i].shape.x *
+				metadata->input1[i].shape.y * metadata->input1[i].shape.z;
+			addr->input[i].sz_d =
+				addr->input[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->input1[i].input_type);
+			addr->input[i].sz_q =
+				addr->input[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
+			addr->total_input_sz_d += addr->input[i].sz_d;
+			addr->total_input_sz_q += addr->input[i].sz_q;
+
+			plt_ml_dbg(
+				"model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+				model->model_id, i, metadata->input1[i].shape.w,
+				metadata->input1[i].shape.x, metadata->input1[i].shape.y,
+				metadata->input1[i].shape.z, addr->input[i].sz_d,
+				addr->input[i].sz_q);
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			addr->input[i].nb_elements =
+				metadata->input2[j].shape.w * metadata->input2[j].shape.x *
+				metadata->input2[j].shape.y * metadata->input2[j].shape.z;
+			addr->input[i].sz_d =
+				addr->input[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->input2[j].input_type);
+			addr->input[i].sz_q =
+				addr->input[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->input2[j].model_input_type);
+			addr->total_input_sz_d += addr->input[i].sz_d;
+			addr->total_input_sz_q += addr->input[i].sz_q;
+
+			plt_ml_dbg(
+				"model_id = %u, input2[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+				model->model_id, j, metadata->input2[j].shape.w,
+				metadata->input2[j].shape.x, metadata->input2[j].shape.y,
+				metadata->input2[j].shape.z, addr->input[i].sz_d,
+				addr->input[i].sz_q);
+		}
 	}
 
 	/* Outputs */
 	addr->total_output_sz_q = 0;
 	addr->total_output_sz_d = 0;
 	for (i = 0; i < metadata->model.num_output; i++) {
-		addr->output[i].nb_elements = metadata->output1[i].size;
-		addr->output[i].sz_d = addr->output[i].nb_elements *
-				       rte_ml_io_type_size_get(metadata->output1[i].output_type);
-		addr->output[i].sz_q =
-			addr->output[i].nb_elements *
-			rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
-		addr->total_output_sz_q += addr->output[i].sz_q;
-		addr->total_output_sz_d += addr->output[i].sz_d;
-
-		plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u, sz_q = %u", model->model_id, i,
-			   addr->output[i].sz_d, addr->output[i].sz_q);
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			addr->output[i].nb_elements = metadata->output1[i].size;
+			addr->output[i].sz_d =
+				addr->output[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->output1[i].output_type);
+			addr->output[i].sz_q =
+				addr->output[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
+			addr->total_output_sz_q += addr->output[i].sz_q;
+			addr->total_output_sz_d += addr->output[i].sz_d;
+
+			plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u, sz_q = %u",
+				   model->model_id, i, addr->output[i].sz_d, addr->output[i].sz_q);
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			addr->output[i].nb_elements = metadata->output2[j].size;
+			addr->output[i].sz_d =
+				addr->output[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->output2[j].output_type);
+			addr->output[i].sz_q =
+				addr->output[i].nb_elements *
+				rte_ml_io_type_size_get(metadata->output2[j].model_output_type);
+			addr->total_output_sz_q += addr->output[i].sz_q;
+			addr->total_output_sz_d += addr->output[i].sz_d;
+
+			plt_ml_dbg("model_id = %u, output2[%u] - sz_d = %u, sz_q = %u",
+				   model->model_id, j, addr->output[i].sz_d, addr->output[i].sz_q);
+		}
 	}
 }
 
@@ -366,6 +502,7 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
 	struct rte_ml_io_info *output;
 	struct rte_ml_io_info *input;
 	uint8_t i;
+	uint8_t j;
 
 	metadata = &model->metadata;
 	info = PLT_PTR_CAST(model->info);
@@ -389,26 +526,53 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
 
 	/* Set input info */
 	for (i = 0; i < info->nb_inputs; i++) {
-		rte_memcpy(input[i].name, metadata->input1[i].input_name, MRVL_ML_INPUT_NAME_LEN);
-		input[i].dtype = metadata->input1[i].input_type;
-		input[i].qtype = metadata->input1[i].model_input_type;
-		input[i].shape.format = metadata->input1[i].shape.format;
-		input[i].shape.w = metadata->input1[i].shape.w;
-		input[i].shape.x = metadata->input1[i].shape.x;
-		input[i].shape.y = metadata->input1[i].shape.y;
-		input[i].shape.z = metadata->input1[i].shape.z;
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			rte_memcpy(input[i].name, metadata->input1[i].input_name,
+				   MRVL_ML_INPUT_NAME_LEN);
+			input[i].dtype = metadata->input1[i].input_type;
+			input[i].qtype = metadata->input1[i].model_input_type;
+			input[i].shape.format = metadata->input1[i].shape.format;
+			input[i].shape.w = metadata->input1[i].shape.w;
+			input[i].shape.x = metadata->input1[i].shape.x;
+			input[i].shape.y = metadata->input1[i].shape.y;
+			input[i].shape.z = metadata->input1[i].shape.z;
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			rte_memcpy(input[i].name, metadata->input2[j].input_name,
+				   MRVL_ML_INPUT_NAME_LEN);
+			input[i].dtype = metadata->input2[j].input_type;
+			input[i].qtype = metadata->input2[j].model_input_type;
+			input[i].shape.format = metadata->input2[j].shape.format;
+			input[i].shape.w = metadata->input2[j].shape.w;
+			input[i].shape.x = metadata->input2[j].shape.x;
+			input[i].shape.y = metadata->input2[j].shape.y;
+			input[i].shape.z = metadata->input2[j].shape.z;
+		}
 	}
 
 	/* Set output info */
 	for (i = 0; i < info->nb_outputs; i++) {
-		rte_memcpy(output[i].name, metadata->output1[i].output_name,
-			   MRVL_ML_OUTPUT_NAME_LEN);
-		output[i].dtype = metadata->output1[i].output_type;
-		output[i].qtype = metadata->output1[i].model_output_type;
-		output[i].shape.format = RTE_ML_IO_FORMAT_1D;
-		output[i].shape.w = metadata->output1[i].size;
-		output[i].shape.x = 1;
-		output[i].shape.y = 1;
-		output[i].shape.z = 1;
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			rte_memcpy(output[i].name, metadata->output1[i].output_name,
+				   MRVL_ML_OUTPUT_NAME_LEN);
+			output[i].dtype = metadata->output1[i].output_type;
+			output[i].qtype = metadata->output1[i].model_output_type;
+			output[i].shape.format = RTE_ML_IO_FORMAT_1D;
+			output[i].shape.w = metadata->output1[i].size;
+			output[i].shape.x = 1;
+			output[i].shape.y = 1;
+			output[i].shape.z = 1;
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			rte_memcpy(output[i].name, metadata->output2[j].output_name,
+				   MRVL_ML_OUTPUT_NAME_LEN);
+			output[i].dtype = metadata->output2[j].output_type;
+			output[i].qtype = metadata->output2[j].model_output_type;
+			output[i].shape.format = RTE_ML_IO_FORMAT_1D;
+			output[i].shape.w = metadata->output2[j].size;
+			output[i].shape.x = 1;
+			output[i].shape.y = 1;
+			output[i].shape.z = 1;
+		}
 	}
 }
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index bd863a8c12..5c34e4d747 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -30,6 +30,7 @@ enum cn10k_ml_model_state {
 #define MRVL_ML_OUTPUT_NAME_LEN	   16
 #define MRVL_ML_NUM_INPUT_OUTPUT_1 8
 #define MRVL_ML_NUM_INPUT_OUTPUT_2 24
+#define MRVL_ML_NUM_INPUT_OUTPUT   (MRVL_ML_NUM_INPUT_OUTPUT_1 + MRVL_ML_NUM_INPUT_OUTPUT_2)
 
 /* Header (256-byte) */
 struct cn10k_ml_model_metadata_header {
@@ -413,7 +414,7 @@ struct cn10k_ml_model_addr {
 
 		/* Quantized input size */
 		uint32_t sz_q;
-	} input[MRVL_ML_NUM_INPUT_OUTPUT_1];
+	} input[MRVL_ML_NUM_INPUT_OUTPUT];
 
 	/* Output address and size */
 	struct {
@@ -425,7 +426,7 @@ struct cn10k_ml_model_addr {
 
 		/* Quantized output size */
 		uint32_t sz_q;
-	} output[MRVL_ML_NUM_INPUT_OUTPUT_1];
+	} output[MRVL_ML_NUM_INPUT_OUTPUT];
 
 	/* Total size of quantized input */
 	uint32_t total_input_sz_q;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index aecc6e74ad..1033afb1b0 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -269,6 +269,7 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
 	struct cn10k_ml_ocm *ocm;
 	char str[STR_LEN];
 	uint8_t i;
+	uint8_t j;
 
 	mldev = dev->data->dev_private;
 	ocm = &mldev->ocm;
@@ -324,16 +325,36 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
 		"model_input_type", "quantize", "format");
 	print_line(fp, LINE_LEN);
 	for (i = 0; i < model->metadata.model.num_input; i++) {
-		fprintf(fp, "%8u  ", i);
-		fprintf(fp, "%*s  ", 16, model->metadata.input1[i].input_name);
-		rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
-		fprintf(fp, "%*s  ", 12, str);
-		rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str, STR_LEN);
-		fprintf(fp, "%*s  ", 18, str);
-		fprintf(fp, "%*s", 12, (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
-		rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str, STR_LEN);
-		fprintf(fp, "%*s", 16, str);
-		fprintf(fp, "\n");
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			fprintf(fp, "%8u  ", i);
+			fprintf(fp, "%*s  ", 16, model->metadata.input1[i].input_name);
+			rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
+			fprintf(fp, "%*s  ", 12, str);
+			rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str,
+					      STR_LEN);
+			fprintf(fp, "%*s  ", 18, str);
+			fprintf(fp, "%*s", 12,
+				(model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
+			rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str,
+						STR_LEN);
+			fprintf(fp, "%*s", 16, str);
+			fprintf(fp, "\n");
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			fprintf(fp, "%8u  ", i);
+			fprintf(fp, "%*s  ", 16, model->metadata.input2[j].input_name);
+			rte_ml_io_type_to_str(model->metadata.input2[j].input_type, str, STR_LEN);
+			fprintf(fp, "%*s  ", 12, str);
+			rte_ml_io_type_to_str(model->metadata.input2[j].model_input_type, str,
+					      STR_LEN);
+			fprintf(fp, "%*s  ", 18, str);
+			fprintf(fp, "%*s", 12,
+				(model->metadata.input2[j].quantize == 1 ? "Yes" : "No"));
+			rte_ml_io_format_to_str(model->metadata.input2[j].shape.format, str,
+						STR_LEN);
+			fprintf(fp, "%*s", 16, str);
+			fprintf(fp, "\n");
+		}
 	}
 	fprintf(fp, "\n");
 
@@ -342,14 +363,30 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
 		"model_output_type", "dequantize");
 	print_line(fp, LINE_LEN);
 	for (i = 0; i < model->metadata.model.num_output; i++) {
-		fprintf(fp, "%8u  ", i);
-		fprintf(fp, "%*s  ", 16, model->metadata.output1[i].output_name);
-		rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
-		fprintf(fp, "%*s  ", 12, str);
-		rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str, STR_LEN);
-		fprintf(fp, "%*s  ", 18, str);
-		fprintf(fp, "%*s", 12, (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
-		fprintf(fp, "\n");
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			fprintf(fp, "%8u  ", i);
+			fprintf(fp, "%*s  ", 16, model->metadata.output1[i].output_name);
+			rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
+			fprintf(fp, "%*s  ", 12, str);
+			rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str,
+					      STR_LEN);
+			fprintf(fp, "%*s  ", 18, str);
+			fprintf(fp, "%*s", 12,
+				(model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
+			fprintf(fp, "\n");
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			fprintf(fp, "%8u  ", i);
+			fprintf(fp, "%*s  ", 16, model->metadata.output2[j].output_name);
+			rte_ml_io_type_to_str(model->metadata.output2[j].output_type, str, STR_LEN);
+			fprintf(fp, "%*s  ", 12, str);
+			rte_ml_io_type_to_str(model->metadata.output2[j].model_output_type, str,
+					      STR_LEN);
+			fprintf(fp, "%*s  ", 18, str);
+			fprintf(fp, "%*s", 12,
+				(model->metadata.output2[j].dequantize == 1 ? "Yes" : "No"));
+			fprintf(fp, "\n");
+		}
 	}
 	fprintf(fp, "\n");
 	print_line(fp, LINE_LEN);
@@ -1863,10 +1900,14 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
 		     void *qbuffer)
 {
 	struct cn10k_ml_model *model;
+	uint8_t model_input_type;
 	uint8_t *lcl_dbuffer;
 	uint8_t *lcl_qbuffer;
+	uint8_t input_type;
 	uint32_t batch_id;
+	float qscale;
 	uint32_t i;
+	uint32_t j;
 	int ret;
 
 	model = dev->data->models[model_id];
@@ -1882,28 +1923,38 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
 
 next_batch:
 	for (i = 0; i < model->metadata.model.num_input; i++) {
-		if (model->metadata.input1[i].input_type ==
-		    model->metadata.input1[i].model_input_type) {
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			input_type = model->metadata.input1[i].input_type;
+			model_input_type = model->metadata.input1[i].model_input_type;
+			qscale = model->metadata.input1[i].qscale;
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			input_type = model->metadata.input2[j].input_type;
+			model_input_type = model->metadata.input2[j].model_input_type;
+			qscale = model->metadata.input2[j].qscale;
+		}
+
+		if (input_type == model_input_type) {
 			rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
 		} else {
 			switch (model->metadata.input1[i].model_input_type) {
 			case RTE_ML_IO_TYPE_INT8:
-				ret = rte_ml_io_float32_to_int8(model->metadata.input1[i].qscale,
+				ret = rte_ml_io_float32_to_int8(qscale,
 								model->addr.input[i].nb_elements,
 								lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT8:
-				ret = rte_ml_io_float32_to_uint8(model->metadata.input1[i].qscale,
+				ret = rte_ml_io_float32_to_uint8(qscale,
 								 model->addr.input[i].nb_elements,
 								 lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_INT16:
-				ret = rte_ml_io_float32_to_int16(model->metadata.input1[i].qscale,
+				ret = rte_ml_io_float32_to_int16(qscale,
 								 model->addr.input[i].nb_elements,
 								 lcl_dbuffer, lcl_qbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT16:
-				ret = rte_ml_io_float32_to_uint16(model->metadata.input1[i].qscale,
+				ret = rte_ml_io_float32_to_uint16(qscale,
 								  model->addr.input[i].nb_elements,
 								  lcl_dbuffer, lcl_qbuffer);
 				break;
@@ -1936,10 +1987,14 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
 		       void *qbuffer, void *dbuffer)
 {
 	struct cn10k_ml_model *model;
+	uint8_t model_output_type;
 	uint8_t *lcl_qbuffer;
 	uint8_t *lcl_dbuffer;
+	uint8_t output_type;
 	uint32_t batch_id;
+	float dscale;
 	uint32_t i;
+	uint32_t j;
 	int ret;
 
 	model = dev->data->models[model_id];
@@ -1955,28 +2010,38 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
 
 next_batch:
 	for (i = 0; i < model->metadata.model.num_output; i++) {
-		if (model->metadata.output1[i].output_type ==
-		    model->metadata.output1[i].model_output_type) {
+		if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+			output_type = model->metadata.output1[i].output_type;
+			model_output_type = model->metadata.output1[i].model_output_type;
+			dscale = model->metadata.output1[i].dscale;
+		} else {
+			j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+			output_type = model->metadata.output2[j].output_type;
+			model_output_type = model->metadata.output2[j].model_output_type;
+			dscale = model->metadata.output2[j].dscale;
+		}
+
+		if (output_type == model_output_type) {
 			rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
 		} else {
 			switch (model->metadata.output1[i].model_output_type) {
 			case RTE_ML_IO_TYPE_INT8:
-				ret = rte_ml_io_int8_to_float32(model->metadata.output1[i].dscale,
+				ret = rte_ml_io_int8_to_float32(dscale,
 								model->addr.output[i].nb_elements,
 								lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT8:
-				ret = rte_ml_io_uint8_to_float32(model->metadata.output1[i].dscale,
+				ret = rte_ml_io_uint8_to_float32(dscale,
 								 model->addr.output[i].nb_elements,
 								 lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_INT16:
-				ret = rte_ml_io_int16_to_float32(model->metadata.output1[i].dscale,
+				ret = rte_ml_io_int16_to_float32(dscale,
 								 model->addr.output[i].nb_elements,
 								 lcl_qbuffer, lcl_dbuffer);
 				break;
 			case RTE_ML_IO_TYPE_UINT16:
-				ret = rte_ml_io_uint16_to_float32(model->metadata.output1[i].dscale,
+				ret = rte_ml_io_uint16_to_float32(dscale,
 								  model->addr.output[i].nb_elements,
 								  lcl_qbuffer, lcl_dbuffer);
 				break;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v1 0/3] Add support for 32 I/O per model
  2023-04-23  5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
                   ` (2 preceding siblings ...)
  2023-04-23  5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
@ 2023-06-12 16:28 ` Thomas Monjalon
  3 siblings, 0 replies; 5+ messages in thread
From: Thomas Monjalon @ 2023-06-12 16:28 UTC (permalink / raw)
  To: Srikanth Yalavarthi; +Cc: dev, syalavarthi, sshankarnara, aprabhu, ptakkar

23/04/2023 07:08, Srikanth Yalavarthi:
> This patch series adds support for 32 inputs / outputs per each
> model. Changes required to enable the required support include:
> 
> 1. Splitiing model metadata fields into structures.
> 2. Update model metadata to v2301 which supports 32 I/O.
> 3. Update ML driver code to support metadata v2301 .
> 
> 
> Srikanth Yalavarthi (3):
>   ml/cnxk: split metadata fields into sections
>   ml/cnxk: update model metadata to v2301
>   ml/cnxk: add support for 32 I/O per model

Applied, thanks.




^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-12 16:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-23  5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
2023-04-23  5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
2023-04-23  5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
2023-04-23  5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).