* [PATCH v1 0/3] Add support for 32 I/O per model
@ 2023-04-23 5:08 Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23 5:08 UTC (permalink / raw)
Cc: dev, syalavarthi, sshankarnara, aprabhu, ptakkar
This patch series adds support for 32 inputs / outputs per each
model. Changes required to enable the required support include:
1. Splitiing model metadata fields into structures.
2. Update model metadata to v2301 which supports 32 I/O.
3. Update ML driver code to support metadata v2301 .
Srikanth Yalavarthi (3):
ml/cnxk: split metadata fields into sections
ml/cnxk: update model metadata to v2301
ml/cnxk: add support for 32 I/O per model
drivers/ml/cnxk/cn10k_ml_model.c | 401 +++++++++++++++++-------
drivers/ml/cnxk/cn10k_ml_model.h | 512 +++++++++++++++++--------------
drivers/ml/cnxk/cn10k_ml_ops.c | 133 ++++++--
3 files changed, 659 insertions(+), 387 deletions(-)
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v1 1/3] ml/cnxk: split metadata fields into sections
2023-04-23 5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
@ 2023-04-23 5:08 ` Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23 5:08 UTC (permalink / raw)
To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar
Split metadata into header, model sections, weights & bias,
input / output and data sections. This is a preparatory step
to introduce v2301 of model metadata.
Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
drivers/ml/cnxk/cn10k_ml_model.c | 26 +-
drivers/ml/cnxk/cn10k_ml_model.h | 487 ++++++++++++++++---------------
2 files changed, 270 insertions(+), 243 deletions(-)
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index 2ded05c5dc..c0b7b061f5 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -47,42 +47,42 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
metadata = (struct cn10k_ml_model_metadata *)buffer;
/* Header CRC check */
- if (metadata->metadata_header.header_crc32c != 0) {
- header_crc32c = rte_hash_crc(
- buffer, sizeof(metadata->metadata_header) - sizeof(uint32_t), 0);
+ if (metadata->header.header_crc32c != 0) {
+ header_crc32c =
+ rte_hash_crc(buffer, sizeof(metadata->header) - sizeof(uint32_t), 0);
- if (header_crc32c != metadata->metadata_header.header_crc32c) {
+ if (header_crc32c != metadata->header.header_crc32c) {
plt_err("Invalid model, Header CRC mismatch");
return -EINVAL;
}
}
/* Payload CRC check */
- if (metadata->metadata_header.payload_crc32c != 0) {
- payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->metadata_header),
- size - sizeof(metadata->metadata_header), 0);
+ if (metadata->header.payload_crc32c != 0) {
+ payload_crc32c = rte_hash_crc(buffer + sizeof(metadata->header),
+ size - sizeof(metadata->header), 0);
- if (payload_crc32c != metadata->metadata_header.payload_crc32c) {
+ if (payload_crc32c != metadata->header.payload_crc32c) {
plt_err("Invalid model, Payload CRC mismatch");
return -EINVAL;
}
}
/* Model magic string */
- if (strncmp((char *)metadata->metadata_header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) {
- plt_err("Invalid model, magic = %s", metadata->metadata_header.magic);
+ if (strncmp((char *)metadata->header.magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) {
+ plt_err("Invalid model, magic = %s", metadata->header.magic);
return -EINVAL;
}
/* Target architecture */
- if (metadata->metadata_header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) {
+ if (metadata->header.target_architecture != MRVL_ML_MODEL_TARGET_ARCH) {
plt_err("Model target architecture (%u) not supported",
- metadata->metadata_header.target_architecture);
+ metadata->header.target_architecture);
return -ENOTSUP;
}
/* Header version */
- rte_memcpy(version, metadata->metadata_header.version, 4 * sizeof(uint8_t));
+ rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) {
plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10,
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 1bc748265d..b30ad5a981 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -30,298 +30,325 @@ enum cn10k_ml_model_state {
#define MRVL_ML_OUTPUT_NAME_LEN 16
#define MRVL_ML_INPUT_OUTPUT_SIZE 8
-/* Model file metadata structure */
-struct cn10k_ml_model_metadata {
- /* Header (256-byte) */
- struct {
- /* Magic string ('M', 'R', 'V', 'L') */
- uint8_t magic[4];
+/* Header (256-byte) */
+struct cn10k_ml_model_metadata_header {
+ /* Magic string ('M', 'R', 'V', 'L') */
+ uint8_t magic[4];
- /* Metadata version */
- uint8_t version[4];
+ /* Metadata version */
+ uint8_t version[4];
- /* Metadata size */
- uint32_t metadata_size;
+ /* Metadata size */
+ uint32_t metadata_size;
- /* Unique ID */
- uint8_t uuid[128];
+ /* Unique ID */
+ uint8_t uuid[128];
- /* Model target architecture
- * 0 = Undefined
- * 1 = M1K
- * 128 = MLIP
- * 256 = Experimental
- */
- uint32_t target_architecture;
- uint8_t reserved[104];
+ /* Model target architecture
+ * 0 = Undefined
+ * 1 = M1K
+ * 128 = MLIP
+ * 256 = Experimental
+ */
+ uint32_t target_architecture;
+ uint8_t reserved[104];
- /* CRC of data after metadata_header (i.e. after first 256 bytes) */
- uint32_t payload_crc32c;
+ /* CRC of data after header (i.e. after first 256 bytes) */
+ uint32_t payload_crc32c;
- /* CRC of first 252 bytes of metadata_header, after payload_crc calculation */
- uint32_t header_crc32c;
- } metadata_header;
+ /* CRC of first 252 bytes of header, after payload_crc calculation */
+ uint32_t header_crc32c;
+};
- /* Model information (256-byte) */
- struct {
- /* Model name string */
- uint8_t name[MRVL_ML_MODEL_NAME_LEN];
+/* Model information (256-byte) */
+struct cn10k_ml_model_metadata_model {
+ /* Model name string */
+ uint8_t name[MRVL_ML_MODEL_NAME_LEN];
- /* Model version info (xx.xx.xx.xx) */
- uint8_t version[4];
+ /* Model version info (xx.xx.xx.xx) */
+ uint8_t version[4];
- /* Model code size (Init + Main + Finish) */
- uint32_t code_size;
+ /* Model code size (Init + Main + Finish) */
+ uint32_t code_size;
- /* Model data size (Weights and Bias) */
- uint32_t data_size;
+ /* Model data size (Weights and Bias) */
+ uint32_t data_size;
- /* OCM start offset, set to ocm_wb_range_start */
- uint32_t ocm_start;
+ /* OCM start offset, set to ocm_wb_range_start */
+ uint32_t ocm_start;
- /* OCM start offset, set to max OCM size */
- uint32_t ocm_end;
+ /* OCM start offset, set to max OCM size */
+ uint32_t ocm_end;
- /* Relocatable flag (always yes)
- * 0 = Not relocatable
- * 1 = Relocatable
- */
- uint8_t ocm_relocatable;
+ /* Relocatable flag (always yes)
+ * 0 = Not relocatable
+ * 1 = Relocatable
+ */
+ uint8_t ocm_relocatable;
- /* Tile relocatable flag (always yes)
- * 0 = Not relocatable
- * 1 = Relocatable
- */
- uint8_t tile_relocatable;
+ /* Tile relocatable flag (always yes)
+ * 0 = Not relocatable
+ * 1 = Relocatable
+ */
+ uint8_t tile_relocatable;
- /* Start tile (Always 0) */
- uint8_t tile_start;
+ /* Start tile (Always 0) */
+ uint8_t tile_start;
- /* End tile (num_tiles - 1) */
- uint8_t tile_end;
+ /* End tile (num_tiles - 1) */
+ uint8_t tile_end;
- /* Inference batch size */
- uint8_t batch_size;
+ /* Inference batch size */
+ uint8_t batch_size;
- /* Number of input tensors (Max 8) */
- uint8_t num_input;
+ /* Number of input tensors (Max 8) */
+ uint8_t num_input;
- /* Number of output tensors (Max 8) */
- uint8_t num_output;
- uint8_t reserved1;
+ /* Number of output tensors (Max 8) */
+ uint8_t num_output;
+ uint8_t reserved_1;
- /* Total input size in bytes */
- uint32_t input_size;
+ /* Total input size in bytes */
+ uint32_t input_size;
- /* Total output size in bytes */
- uint32_t output_size;
+ /* Total output size in bytes */
+ uint32_t output_size;
- /* Table size in bytes */
- uint32_t table_size;
+ /* Table size in bytes */
+ uint32_t table_size;
- /* Number of layers in the network */
- uint32_t num_layers;
- uint32_t reserved2;
+ /* Number of layers in the network */
+ uint32_t num_layers;
+ uint32_t reserved_2;
- /* Floor of absolute OCM region */
- uint64_t ocm_tmp_range_floor;
+ /* Floor of absolute OCM region */
+ uint64_t ocm_tmp_range_floor;
- /* Relative OCM start address of WB data block */
- uint64_t ocm_wb_range_start;
+ /* Relative OCM start address of WB data block */
+ uint64_t ocm_wb_range_start;
- /* Relative OCM end address of WB data block */
- uint64_t ocm_wb_range_end;
+ /* Relative OCM end address of WB data block */
+ uint64_t ocm_wb_range_end;
- /* Relative DDR start address of WB data block */
- uint64_t ddr_wb_range_start;
+ /* Relative DDR start address of WB data block */
+ uint64_t ddr_wb_range_start;
- /* Relative DDR end address of all outputs */
- uint64_t ddr_wb_range_end;
+ /* Relative DDR end address of all outputs */
+ uint64_t ddr_wb_range_end;
- /* Relative DDR start address of all inputs */
- uint64_t ddr_input_range_start;
+ /* Relative DDR start address of all inputs */
+ uint64_t ddr_input_range_start;
- /* Relative DDR end address of all inputs */
- uint64_t ddr_input_range_end;
+ /* Relative DDR end address of all inputs */
+ uint64_t ddr_input_range_end;
- /* Relative DDR start address of all outputs */
- uint64_t ddr_output_range_start;
+ /* Relative DDR start address of all outputs */
+ uint64_t ddr_output_range_start;
- /* Relative DDR end address of all outputs */
- uint64_t ddr_output_range_end;
+ /* Relative DDR end address of all outputs */
+ uint64_t ddr_output_range_end;
- /* Compiler version */
- uint8_t compiler_version[8];
+ /* Compiler version */
+ uint8_t compiler_version[8];
- /* CDK version */
- uint8_t cdk_version[4];
+ /* CDK version */
+ uint8_t cdk_version[4];
- /* Lower batch optimization support
- * 0 - No,
- * 1 - Yes
- */
- uint8_t supports_lower_batch_size_optimization;
- uint8_t reserved3[59];
- } model;
+ /* Lower batch optimization support
+ * 0 - No,
+ * 1 - Yes
+ */
+ uint8_t supports_lower_batch_size_optimization;
+ uint8_t reserved_3[59];
+};
- /* Init section (64-byte) */
- struct {
- uint32_t file_offset;
- uint32_t file_size;
- uint8_t reserved[56];
- } init_model;
+/* Init section (64-byte) */
+struct cn10k_ml_model_metadata_init_section {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+};
- /* Main section (64-byte) */
- struct {
- uint32_t file_offset;
- uint32_t file_size;
- uint8_t reserved[56];
- } main_model;
+/* Main section (64-byte) */
+struct cn10k_ml_model_metadata_main_section {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+};
- /* Finish section (64-byte) */
- struct {
- uint32_t file_offset;
- uint32_t file_size;
- uint8_t reserved[56];
- } finish_model;
+/* Finish section (64-byte) */
+struct cn10k_ml_model_metadata_finish_section {
+ uint32_t file_offset;
+ uint32_t file_size;
+ uint8_t reserved[56];
+};
- uint8_t reserved1[512]; /* End of 2k bytes */
+/* Weights and Bias (64-byte) */
+struct cn10k_ml_model_metadata_weights_bias_section {
+ /* Memory offset, set to ddr_wb_range_start */
+ uint64_t mem_offset;
+ uint32_t file_offset;
+ uint32_t file_size;
- /* Weights and Bias (64-byte) */
+ /* Relocatable flag for WB
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
+ uint8_t reserved[47];
+};
+
+/* Input section (64-byte per input) */
+struct cn10k_ml_model_metadata_input_section {
+ /* DDR offset (in OCM absolute addresses for input) */
+ uint64_t mem_offset;
+
+ /* Relocatable flag
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
+
+ /* Input quantization
+ * 1 = Requires quantization
+ * 2 = Pre-quantized
+ */
+ uint8_t quantize;
+
+ /* Type of incoming input
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t input_type;
+
+ /* Type of input required by model
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t model_input_type;
+
+ /* float_32 qscale value
+ * quantized = non-quantized * qscale
+ */
+ float qscale;
+
+ /* Input shape */
struct {
- /* Memory offset, set to ddr_wb_range_start */
- uint64_t mem_offset;
- uint32_t file_offset;
- uint32_t file_size;
-
- /* Relocatable flag for WB
- * 1 = Relocatable
- * 2 = Not relocatable
+ /* Input format
+ * 1 = NCHW
+ * 2 = NHWC
*/
- uint8_t relocatable;
- uint8_t reserved[47];
- } weights_bias;
+ uint8_t format;
+ uint8_t reserved[3];
+ uint32_t w;
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+ } shape;
+ uint8_t reserved[4];
+
+ /* Name of input */
+ uint8_t input_name[MRVL_ML_INPUT_NAME_LEN];
+
+ /* DDR range end
+ * new = mem_offset + size_bytes - 1
+ */
+ uint64_t ddr_range_end;
+};
- /* Input (512-byte, 64-byte per input) provisioned for 8 inputs */
- struct {
- /* DDR offset (in OCM absolute addresses for input) */
- uint64_t mem_offset;
+/* Output section (64-byte per output) */
+struct cn10k_ml_model_metadata_output_section {
+ /* DDR offset in OCM absolute addresses for output */
+ uint64_t mem_offset;
- /* Relocatable flag
- * 1 = Relocatable
- * 2 = Not relocatable
- */
- uint8_t relocatable;
+ /* Relocatable flag
+ * 1 = Relocatable
+ * 2 = Not relocatable
+ */
+ uint8_t relocatable;
- /* Input quantization
- * 1 = Requires quantization
- * 2 = Pre-quantized
- */
- uint8_t quantize;
+ /* Output dequantization
+ * 1 = De-quantization required
+ * 2 = De-quantization not required
+ */
+ uint8_t dequantize;
- /* Type of incoming input
- * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
- * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
- */
- uint8_t input_type;
+ /* Type of outgoing output
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t output_type;
- /* Type of input required by model
- * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16,
- * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
- */
- uint8_t model_input_type;
+ /* Type of output produced by model
+ * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
+ * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
+ */
+ uint8_t model_output_type;
- /* float_32 qscale value
- * quantized = non-quantized * qscale
- */
- float qscale;
-
- /* Input shape */
- struct {
- /* Input format
- * 1 = NCHW
- * 2 = NHWC
- */
- uint8_t format;
- uint8_t reserved[3];
- uint32_t w;
- uint32_t x;
- uint32_t y;
- uint32_t z;
- } shape;
- uint8_t reserved[4];
-
- /* Name of input */
- uint8_t input_name[MRVL_ML_INPUT_NAME_LEN];
-
- /* DDR range end
- * new = mem_offset + size_bytes - 1
- */
- uint64_t ddr_range_end;
- } input[MRVL_ML_INPUT_OUTPUT_SIZE];
+ /* float_32 dscale value
+ * dequantized = quantized * dscale
+ */
+ float dscale;
- /* Output (512 byte, 64-byte per input) provisioned for 8 outputs */
- struct {
- /* DDR offset in OCM absolute addresses for output */
- uint64_t mem_offset;
+ /* Number of items in the output */
+ uint32_t size;
+ uint8_t reserved[20];
- /* Relocatable flag
- * 1 = Relocatable
- * 2 = Not relocatable
- */
- uint8_t relocatable;
+ /* DDR range end
+ * new = mem_offset + size_bytes - 1
+ */
+ uint64_t ddr_range_end;
+ uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN];
+};
- /* Output dequantization
- * 1 = De-quantization required
- * 2 = De-quantization not required
- */
- uint8_t dequantize;
+/* Model data */
+struct cn10k_ml_model_metadata_data_section {
+ uint8_t reserved[4068];
- /* Type of outgoing output
- * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
- * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
- */
- uint8_t output_type;
+ /* Beta: xx.xx.xx.xx,
+ * Later: YYYYMM.xx.xx
+ */
+ uint8_t compiler_version[8];
- /* Type of output produced by model
- * 1 = INT8, 2 = UINT8, 3 = INT16, 4 = UINT16
- * 5 = INT32, 6 = UINT32, 7 = FP16, 8 = FP32
- */
- uint8_t model_output_type;
+ /* M1K CDK version (xx.xx.xx.xx) */
+ uint8_t m1k_cdk_version[4];
+};
- /* float_32 dscale value
- * dequantized = quantized * dscale
- */
- float dscale;
+/* Model file metadata structure */
+struct cn10k_ml_model_metadata {
+ /* Header (256-byte) */
+ struct cn10k_ml_model_metadata_header header;
- /* Number of items in the output */
- uint32_t size;
- uint8_t reserved[20];
+ /* Model information (256-byte) */
+ struct cn10k_ml_model_metadata_model model;
- /* DDR range end
- * new = mem_offset + size_bytes - 1
- */
- uint64_t ddr_range_end;
- uint8_t output_name[MRVL_ML_OUTPUT_NAME_LEN];
- } output[MRVL_ML_INPUT_OUTPUT_SIZE];
+ /* Init section (64-byte) */
+ struct cn10k_ml_model_metadata_init_section init_model;
- uint8_t reserved2[1792];
+ /* Main section (64-byte) */
+ struct cn10k_ml_model_metadata_main_section main_model;
- /* Model data */
- struct {
- uint8_t reserved1[4068];
+ /* Finish section (64-byte) */
+ struct cn10k_ml_model_metadata_finish_section finish_model;
- /* Beta: xx.xx.xx.xx,
- * Later: YYYYMM.xx.xx
- */
- uint8_t compiler_version[8];
+ uint8_t reserved_1[512]; /* End of 2k bytes */
+
+ /* Weights and Bias (64-byte) */
+ struct cn10k_ml_model_metadata_weights_bias_section weights_bias;
+
+ /* Input (512-bytes, 64-byte per input) provisioned for 8 inputs */
+ struct cn10k_ml_model_metadata_input_section input[MRVL_ML_INPUT_OUTPUT_SIZE];
+
+ /* Output (512-bytes, 64-byte per output) provisioned for 8 outputs */
+ struct cn10k_ml_model_metadata_output_section output[MRVL_ML_INPUT_OUTPUT_SIZE];
- /* M1K CDK version (xx.xx.xx.xx) */
- uint8_t m1k_cdk_version[4];
- } data;
+ uint8_t reserved_2[1792];
+
+ /* Model data */
+ struct cn10k_ml_model_metadata_data_section data;
/* Hidden 16 bytes of magic code */
- uint8_t reserved3[16];
+ uint8_t reserved_3[16];
};
/* Model address structure */
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v1 2/3] ml/cnxk: update model metadata to v2301
2023-04-23 5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
@ 2023-04-23 5:08 ` Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon
3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23 5:08 UTC (permalink / raw)
To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar
Update model metadata to v2301. Revised metadata introduces
fields to support up to 32 inputs/outputs per model, scratch
relocation and updates to names of existing fields. Update
driver files to include changes in names of metadata fields.
Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
drivers/ml/cnxk/cn10k_ml_model.c | 111 ++++++++++++++++---------------
drivers/ml/cnxk/cn10k_ml_model.h | 36 +++++++---
drivers/ml/cnxk/cn10k_ml_ops.c | 50 +++++++-------
3 files changed, 106 insertions(+), 91 deletions(-)
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index c0b7b061f5..a15df700aa 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -83,11 +83,11 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
/* Header version */
rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
- if (version[0] * 1000 + version[1] * 100 < MRVL_ML_MODEL_VERSION) {
+ if (version[0] * 1000 + version[1] * 100 != MRVL_ML_MODEL_VERSION_MIN) {
plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
- version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION / 1000) % 10,
- (MRVL_ML_MODEL_VERSION / 100) % 10, (MRVL_ML_MODEL_VERSION / 10) % 10,
- MRVL_ML_MODEL_VERSION % 10);
+ version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
+ (MRVL_ML_MODEL_VERSION_MIN / 100) % 10,
+ (MRVL_ML_MODEL_VERSION_MIN / 10) % 10, MRVL_ML_MODEL_VERSION_MIN % 10);
return -ENOTSUP;
}
@@ -125,36 +125,36 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
}
/* Check input count */
- if (metadata->model.num_input > MRVL_ML_INPUT_OUTPUT_SIZE) {
+ if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
plt_err("Invalid metadata, num_input = %u (> %u)", metadata->model.num_input,
- MRVL_ML_INPUT_OUTPUT_SIZE);
+ MRVL_ML_NUM_INPUT_OUTPUT_1);
return -EINVAL;
}
/* Check output count */
- if (metadata->model.num_output > MRVL_ML_INPUT_OUTPUT_SIZE) {
+ if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
plt_err("Invalid metadata, num_output = %u (> %u)", metadata->model.num_output,
- MRVL_ML_INPUT_OUTPUT_SIZE);
+ MRVL_ML_NUM_INPUT_OUTPUT_1);
return -EINVAL;
}
/* Inputs */
for (i = 0; i < metadata->model.num_input; i++) {
- if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input[i].input_type)) <=
+ if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input1[i].input_type)) <=
0) {
plt_err("Invalid metadata, input[%u] : input_type = %u", i,
- metadata->input[i].input_type);
+ metadata->input1[i].input_type);
return -EINVAL;
}
if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->input[i].model_input_type)) <= 0) {
+ cn10k_ml_io_type_map(metadata->input1[i].model_input_type)) <= 0) {
plt_err("Invalid metadata, input[%u] : model_input_type = %u", i,
- metadata->input[i].model_input_type);
+ metadata->input1[i].model_input_type);
return -EINVAL;
}
- if (metadata->input[i].relocatable != 1) {
+ if (metadata->input1[i].relocatable != 1) {
plt_err("Model not supported, non-relocatable input: %u", i);
return -ENOTSUP;
}
@@ -163,20 +163,20 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
/* Outputs */
for (i = 0; i < metadata->model.num_output; i++) {
if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->output[i].output_type)) <= 0) {
+ cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
plt_err("Invalid metadata, output[%u] : output_type = %u", i,
- metadata->output[i].output_type);
+ metadata->output1[i].output_type);
return -EINVAL;
}
if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->output[i].model_output_type)) <= 0) {
+ cn10k_ml_io_type_map(metadata->output1[i].model_output_type)) <= 0) {
plt_err("Invalid metadata, output[%u] : model_output_type = %u", i,
- metadata->output[i].model_output_type);
+ metadata->output1[i].model_output_type);
return -EINVAL;
}
- if (metadata->output[i].relocatable != 1) {
+ if (metadata->output1[i].relocatable != 1) {
plt_err("Model not supported, non-relocatable output: %u", i);
return -ENOTSUP;
}
@@ -191,28 +191,29 @@ cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
uint8_t i;
for (i = 0; i < metadata->model.num_input; i++) {
- metadata->input[i].input_type = cn10k_ml_io_type_map(metadata->input[i].input_type);
- metadata->input[i].model_input_type =
- cn10k_ml_io_type_map(metadata->input[i].model_input_type);
+ metadata->input1[i].input_type =
+ cn10k_ml_io_type_map(metadata->input1[i].input_type);
+ metadata->input1[i].model_input_type =
+ cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
- if (metadata->input[i].shape.w == 0)
- metadata->input[i].shape.w = 1;
+ if (metadata->input1[i].shape.w == 0)
+ metadata->input1[i].shape.w = 1;
- if (metadata->input[i].shape.x == 0)
- metadata->input[i].shape.x = 1;
+ if (metadata->input1[i].shape.x == 0)
+ metadata->input1[i].shape.x = 1;
- if (metadata->input[i].shape.y == 0)
- metadata->input[i].shape.y = 1;
+ if (metadata->input1[i].shape.y == 0)
+ metadata->input1[i].shape.y = 1;
- if (metadata->input[i].shape.z == 0)
- metadata->input[i].shape.z = 1;
+ if (metadata->input1[i].shape.z == 0)
+ metadata->input1[i].shape.z = 1;
}
for (i = 0; i < metadata->model.num_output; i++) {
- metadata->output[i].output_type =
- cn10k_ml_io_type_map(metadata->output[i].output_type);
- metadata->output[i].model_output_type =
- cn10k_ml_io_type_map(metadata->output[i].model_output_type);
+ metadata->output1[i].output_type =
+ cn10k_ml_io_type_map(metadata->output1[i].output_type);
+ metadata->output1[i].model_output_type =
+ cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
}
}
@@ -272,31 +273,31 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
addr->total_input_sz_q = 0;
for (i = 0; i < metadata->model.num_input; i++) {
addr->input[i].nb_elements =
- metadata->input[i].shape.w * metadata->input[i].shape.x *
- metadata->input[i].shape.y * metadata->input[i].shape.z;
+ metadata->input1[i].shape.w * metadata->input1[i].shape.x *
+ metadata->input1[i].shape.y * metadata->input1[i].shape.z;
addr->input[i].sz_d = addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input[i].input_type);
+ rte_ml_io_type_size_get(metadata->input1[i].input_type);
addr->input[i].sz_q = addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input[i].model_input_type);
+ rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
addr->total_input_sz_d += addr->input[i].sz_d;
addr->total_input_sz_q += addr->input[i].sz_q;
plt_ml_dbg("model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
- model->model_id, i, metadata->input[i].shape.w,
- metadata->input[i].shape.x, metadata->input[i].shape.y,
- metadata->input[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
+ model->model_id, i, metadata->input1[i].shape.w,
+ metadata->input1[i].shape.x, metadata->input1[i].shape.y,
+ metadata->input1[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
}
/* Outputs */
addr->total_output_sz_q = 0;
addr->total_output_sz_d = 0;
for (i = 0; i < metadata->model.num_output; i++) {
- addr->output[i].nb_elements = metadata->output[i].size;
+ addr->output[i].nb_elements = metadata->output1[i].size;
addr->output[i].sz_d = addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output[i].output_type);
+ rte_ml_io_type_size_get(metadata->output1[i].output_type);
addr->output[i].sz_q =
addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output[i].model_output_type);
+ rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
addr->total_output_sz_q += addr->output[i].sz_q;
addr->total_output_sz_d += addr->output[i].sz_d;
@@ -388,24 +389,24 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
/* Set input info */
for (i = 0; i < info->nb_inputs; i++) {
- rte_memcpy(input[i].name, metadata->input[i].input_name, MRVL_ML_INPUT_NAME_LEN);
- input[i].dtype = metadata->input[i].input_type;
- input[i].qtype = metadata->input[i].model_input_type;
- input[i].shape.format = metadata->input[i].shape.format;
- input[i].shape.w = metadata->input[i].shape.w;
- input[i].shape.x = metadata->input[i].shape.x;
- input[i].shape.y = metadata->input[i].shape.y;
- input[i].shape.z = metadata->input[i].shape.z;
+ rte_memcpy(input[i].name, metadata->input1[i].input_name, MRVL_ML_INPUT_NAME_LEN);
+ input[i].dtype = metadata->input1[i].input_type;
+ input[i].qtype = metadata->input1[i].model_input_type;
+ input[i].shape.format = metadata->input1[i].shape.format;
+ input[i].shape.w = metadata->input1[i].shape.w;
+ input[i].shape.x = metadata->input1[i].shape.x;
+ input[i].shape.y = metadata->input1[i].shape.y;
+ input[i].shape.z = metadata->input1[i].shape.z;
}
/* Set output info */
for (i = 0; i < info->nb_outputs; i++) {
- rte_memcpy(output[i].name, metadata->output[i].output_name,
+ rte_memcpy(output[i].name, metadata->output1[i].output_name,
MRVL_ML_OUTPUT_NAME_LEN);
- output[i].dtype = metadata->output[i].output_type;
- output[i].qtype = metadata->output[i].model_output_type;
+ output[i].dtype = metadata->output1[i].output_type;
+ output[i].qtype = metadata->output1[i].model_output_type;
output[i].shape.format = RTE_ML_IO_FORMAT_1D;
- output[i].shape.w = metadata->output[i].size;
+ output[i].shape.w = metadata->output1[i].size;
output[i].shape.x = 1;
output[i].shape.y = 1;
output[i].shape.z = 1;
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index b30ad5a981..bd863a8c12 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -21,14 +21,15 @@ enum cn10k_ml_model_state {
ML_CN10K_MODEL_STATE_UNKNOWN,
};
-/* Model Metadata : v 2.1.0.2 */
+/* Model Metadata : v 2.3.0.1 */
#define MRVL_ML_MODEL_MAGIC_STRING "MRVL"
#define MRVL_ML_MODEL_TARGET_ARCH 128
-#define MRVL_ML_MODEL_VERSION 2100
+#define MRVL_ML_MODEL_VERSION_MIN 2100
#define MRVL_ML_MODEL_NAME_LEN 64
#define MRVL_ML_INPUT_NAME_LEN 16
#define MRVL_ML_OUTPUT_NAME_LEN 16
-#define MRVL_ML_INPUT_OUTPUT_SIZE 8
+#define MRVL_ML_NUM_INPUT_OUTPUT_1 8
+#define MRVL_ML_NUM_INPUT_OUTPUT_2 24
/* Header (256-byte) */
struct cn10k_ml_model_metadata_header {
@@ -101,10 +102,10 @@ struct cn10k_ml_model_metadata_model {
/* Inference batch size */
uint8_t batch_size;
- /* Number of input tensors (Max 8) */
+ /* Number of input tensors (Max 32) */
uint8_t num_input;
- /* Number of output tensors (Max 8) */
+ /* Number of output tensors (Max 32) */
uint8_t num_output;
uint8_t reserved_1;
@@ -159,7 +160,14 @@ struct cn10k_ml_model_metadata_model {
* 1 - Yes
*/
uint8_t supports_lower_batch_size_optimization;
- uint8_t reserved_3[59];
+ uint8_t reserved_3[3];
+
+ /* Relative DDR start address of scratch space */
+ uint64_t ddr_scratch_range_start;
+
+ /* Relative DDR end address of scratch space */
+ uint64_t ddr_scratch_range_end;
+ uint8_t reserved_4[40];
};
/* Init section (64-byte) */
@@ -303,7 +311,7 @@ struct cn10k_ml_model_metadata_output_section {
/* Model data */
struct cn10k_ml_model_metadata_data_section {
- uint8_t reserved[4068];
+ uint8_t reserved[996];
/* Beta: xx.xx.xx.xx,
* Later: YYYYMM.xx.xx
@@ -337,13 +345,19 @@ struct cn10k_ml_model_metadata {
struct cn10k_ml_model_metadata_weights_bias_section weights_bias;
/* Input (512-bytes, 64-byte per input) provisioned for 8 inputs */
- struct cn10k_ml_model_metadata_input_section input[MRVL_ML_INPUT_OUTPUT_SIZE];
+ struct cn10k_ml_model_metadata_input_section input1[MRVL_ML_NUM_INPUT_OUTPUT_1];
/* Output (512-bytes, 64-byte per output) provisioned for 8 outputs */
- struct cn10k_ml_model_metadata_output_section output[MRVL_ML_INPUT_OUTPUT_SIZE];
+ struct cn10k_ml_model_metadata_output_section output1[MRVL_ML_NUM_INPUT_OUTPUT_1];
uint8_t reserved_2[1792];
+ /* Input (1536-bytes, 64-byte per input) provisioned for 24 inputs */
+ struct cn10k_ml_model_metadata_input_section input2[MRVL_ML_NUM_INPUT_OUTPUT_2];
+
+ /* Output (1536-bytes, 64-byte per output) provisioned for 24 outputs */
+ struct cn10k_ml_model_metadata_output_section output2[MRVL_ML_NUM_INPUT_OUTPUT_2];
+
/* Model data */
struct cn10k_ml_model_metadata_data_section data;
@@ -399,7 +413,7 @@ struct cn10k_ml_model_addr {
/* Quantized input size */
uint32_t sz_q;
- } input[MRVL_ML_INPUT_OUTPUT_SIZE];
+ } input[MRVL_ML_NUM_INPUT_OUTPUT_1];
/* Output address and size */
struct {
@@ -411,7 +425,7 @@ struct cn10k_ml_model_addr {
/* Quantized output size */
uint32_t sz_q;
- } output[MRVL_ML_INPUT_OUTPUT_SIZE];
+ } output[MRVL_ML_NUM_INPUT_OUTPUT_1];
/* Total size of quantized input */
uint32_t total_input_sz_q;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index b5eaa24e83..aecc6e74ad 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -325,13 +325,13 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
print_line(fp, LINE_LEN);
for (i = 0; i < model->metadata.model.num_input; i++) {
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.input[i].input_name);
- rte_ml_io_type_to_str(model->metadata.input[i].input_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 16, model->metadata.input1[i].input_name);
+ rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.input[i].model_input_type, str, STR_LEN);
+ rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str, STR_LEN);
fprintf(fp, "%*s ", 18, str);
- fprintf(fp, "%*s", 12, (model->metadata.input[i].quantize == 1 ? "Yes" : "No"));
- rte_ml_io_format_to_str(model->metadata.input[i].shape.format, str, STR_LEN);
+ fprintf(fp, "%*s", 12, (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
+ rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str, STR_LEN);
fprintf(fp, "%*s", 16, str);
fprintf(fp, "\n");
}
@@ -343,12 +343,12 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
print_line(fp, LINE_LEN);
for (i = 0; i < model->metadata.model.num_output; i++) {
fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.output[i].output_name);
- rte_ml_io_type_to_str(model->metadata.output[i].output_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 16, model->metadata.output1[i].output_name);
+ rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.output[i].model_output_type, str, STR_LEN);
+ rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str, STR_LEN);
fprintf(fp, "%*s ", 18, str);
- fprintf(fp, "%*s", 12, (model->metadata.output[i].dequantize == 1 ? "Yes" : "No"));
+ fprintf(fp, "%*s", 12, (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
fprintf(fp, "\n");
}
fprintf(fp, "\n");
@@ -1882,28 +1882,28 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
next_batch:
for (i = 0; i < model->metadata.model.num_input; i++) {
- if (model->metadata.input[i].input_type ==
- model->metadata.input[i].model_input_type) {
+ if (model->metadata.input1[i].input_type ==
+ model->metadata.input1[i].model_input_type) {
rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
} else {
- switch (model->metadata.input[i].model_input_type) {
+ switch (model->metadata.input1[i].model_input_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_float32_to_int8(model->metadata.input[i].qscale,
+ ret = rte_ml_io_float32_to_int8(model->metadata.input1[i].qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_float32_to_uint8(model->metadata.input[i].qscale,
+ ret = rte_ml_io_float32_to_uint8(model->metadata.input1[i].qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_float32_to_int16(model->metadata.input[i].qscale,
+ ret = rte_ml_io_float32_to_int16(model->metadata.input1[i].qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_float32_to_uint16(model->metadata.input[i].qscale,
+ ret = rte_ml_io_float32_to_uint16(model->metadata.input1[i].qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
@@ -1913,7 +1913,7 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
break;
default:
plt_err("Unsupported model_input_type[%u] : %u", i,
- model->metadata.input[i].model_input_type);
+ model->metadata.input1[i].model_input_type);
ret = -ENOTSUP;
}
if (ret < 0)
@@ -1955,28 +1955,28 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
next_batch:
for (i = 0; i < model->metadata.model.num_output; i++) {
- if (model->metadata.output[i].output_type ==
- model->metadata.output[i].model_output_type) {
+ if (model->metadata.output1[i].output_type ==
+ model->metadata.output1[i].model_output_type) {
rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
} else {
- switch (model->metadata.output[i].model_output_type) {
+ switch (model->metadata.output1[i].model_output_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_int8_to_float32(model->metadata.output[i].dscale,
+ ret = rte_ml_io_int8_to_float32(model->metadata.output1[i].dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_uint8_to_float32(model->metadata.output[i].dscale,
+ ret = rte_ml_io_uint8_to_float32(model->metadata.output1[i].dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_int16_to_float32(model->metadata.output[i].dscale,
+ ret = rte_ml_io_int16_to_float32(model->metadata.output1[i].dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_uint16_to_float32(model->metadata.output[i].dscale,
+ ret = rte_ml_io_uint16_to_float32(model->metadata.output1[i].dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
@@ -1987,7 +1987,7 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
break;
default:
plt_err("Unsupported model_output_type[%u] : %u", i,
- model->metadata.output[i].model_output_type);
+ model->metadata.output1[i].model_output_type);
ret = -ENOTSUP;
}
if (ret < 0)
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model
2023-04-23 5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
@ 2023-04-23 5:08 ` Srikanth Yalavarthi
2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon
3 siblings, 0 replies; 5+ messages in thread
From: Srikanth Yalavarthi @ 2023-04-23 5:08 UTC (permalink / raw)
To: Srikanth Yalavarthi; +Cc: dev, sshankarnara, aprabhu, ptakkar
Added support for 32 inputs and outputs per model.
Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
drivers/ml/cnxk/cn10k_ml_model.c | 374 ++++++++++++++++++++++---------
drivers/ml/cnxk/cn10k_ml_model.h | 5 +-
drivers/ml/cnxk/cn10k_ml_ops.c | 125 ++++++++---
3 files changed, 367 insertions(+), 137 deletions(-)
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index a15df700aa..92c47d39ba 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -41,8 +41,9 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
struct cn10k_ml_model_metadata *metadata;
uint32_t payload_crc32c;
uint32_t header_crc32c;
- uint8_t version[4];
+ uint32_t version;
uint8_t i;
+ uint8_t j;
metadata = (struct cn10k_ml_model_metadata *)buffer;
@@ -82,10 +83,13 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
}
/* Header version */
- rte_memcpy(version, metadata->header.version, 4 * sizeof(uint8_t));
- if (version[0] * 1000 + version[1] * 100 != MRVL_ML_MODEL_VERSION_MIN) {
- plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported", version[0],
- version[1], version[2], version[3], (MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
+ version = metadata->header.version[0] * 1000 + metadata->header.version[1] * 100 +
+ metadata->header.version[2] * 10 + metadata->header.version[3];
+ if (version < MRVL_ML_MODEL_VERSION_MIN) {
+ plt_err("Metadata version = %u.%u.%u.%u (< %u.%u.%u.%u) not supported",
+ metadata->header.version[0], metadata->header.version[1],
+ metadata->header.version[2], metadata->header.version[3],
+ (MRVL_ML_MODEL_VERSION_MIN / 1000) % 10,
(MRVL_ML_MODEL_VERSION_MIN / 100) % 10,
(MRVL_ML_MODEL_VERSION_MIN / 10) % 10, MRVL_ML_MODEL_VERSION_MIN % 10);
return -ENOTSUP;
@@ -125,60 +129,119 @@ cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size)
}
/* Check input count */
- if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
- plt_err("Invalid metadata, num_input = %u (> %u)", metadata->model.num_input,
- MRVL_ML_NUM_INPUT_OUTPUT_1);
- return -EINVAL;
- }
-
- /* Check output count */
- if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
- plt_err("Invalid metadata, num_output = %u (> %u)", metadata->model.num_output,
- MRVL_ML_NUM_INPUT_OUTPUT_1);
- return -EINVAL;
- }
-
- /* Inputs */
- for (i = 0; i < metadata->model.num_input; i++) {
- if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(metadata->input1[i].input_type)) <=
- 0) {
- plt_err("Invalid metadata, input[%u] : input_type = %u", i,
- metadata->input1[i].input_type);
+ if (version < 2301) {
+ if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ plt_err("Invalid metadata, num_input = %u (> %u)",
+ metadata->model.num_input, MRVL_ML_NUM_INPUT_OUTPUT_1);
return -EINVAL;
}
- if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->input1[i].model_input_type)) <= 0) {
- plt_err("Invalid metadata, input[%u] : model_input_type = %u", i,
- metadata->input1[i].model_input_type);
+ /* Check output count */
+ if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ plt_err("Invalid metadata, num_output = %u (> %u)",
+ metadata->model.num_output, MRVL_ML_NUM_INPUT_OUTPUT_1);
return -EINVAL;
}
-
- if (metadata->input1[i].relocatable != 1) {
- plt_err("Model not supported, non-relocatable input: %u", i);
- return -ENOTSUP;
+ } else {
+ if (metadata->model.num_input > MRVL_ML_NUM_INPUT_OUTPUT) {
+ plt_err("Invalid metadata, num_input = %u (> %u)",
+ metadata->model.num_input, MRVL_ML_NUM_INPUT_OUTPUT);
+ return -EINVAL;
}
- }
- /* Outputs */
- for (i = 0; i < metadata->model.num_output; i++) {
- if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
- plt_err("Invalid metadata, output[%u] : output_type = %u", i,
- metadata->output1[i].output_type);
+ /* Check output count */
+ if (metadata->model.num_output > MRVL_ML_NUM_INPUT_OUTPUT) {
+ plt_err("Invalid metadata, num_output = %u (> %u)",
+ metadata->model.num_output, MRVL_ML_NUM_INPUT_OUTPUT);
return -EINVAL;
}
+ }
- if (rte_ml_io_type_size_get(
- cn10k_ml_io_type_map(metadata->output1[i].model_output_type)) <= 0) {
- plt_err("Invalid metadata, output[%u] : model_output_type = %u", i,
- metadata->output1[i].model_output_type);
- return -EINVAL;
+ /* Inputs */
+ for (i = 0; i < metadata->model.num_input; i++) {
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ if (rte_ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->input1[i].input_type)) <= 0) {
+ plt_err("Invalid metadata, input1[%u] : input_type = %u", i,
+ metadata->input1[i].input_type);
+ return -EINVAL;
+ }
+
+ if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+ metadata->input1[i].model_input_type)) <= 0) {
+ plt_err("Invalid metadata, input1[%u] : model_input_type = %u", i,
+ metadata->input1[i].model_input_type);
+ return -EINVAL;
+ }
+
+ if (metadata->input1[i].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable input1: %u", i);
+ return -ENOTSUP;
+ }
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ if (rte_ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->input2[j].input_type)) <= 0) {
+ plt_err("Invalid metadata, input2[%u] : input_type = %u", j,
+ metadata->input2[j].input_type);
+ return -EINVAL;
+ }
+
+ if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+ metadata->input2[j].model_input_type)) <= 0) {
+ plt_err("Invalid metadata, input2[%u] : model_input_type = %u", j,
+ metadata->input2[j].model_input_type);
+ return -EINVAL;
+ }
+
+ if (metadata->input2[j].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable input2: %u", j);
+ return -ENOTSUP;
+ }
}
+ }
- if (metadata->output1[i].relocatable != 1) {
- plt_err("Model not supported, non-relocatable output: %u", i);
- return -ENOTSUP;
+ /* Outputs */
+ for (i = 0; i < metadata->model.num_output; i++) {
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ if (rte_ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->output1[i].output_type)) <= 0) {
+ plt_err("Invalid metadata, output1[%u] : output_type = %u", i,
+ metadata->output1[i].output_type);
+ return -EINVAL;
+ }
+
+ if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+ metadata->output1[i].model_output_type)) <= 0) {
+ plt_err("Invalid metadata, output1[%u] : model_output_type = %u", i,
+ metadata->output1[i].model_output_type);
+ return -EINVAL;
+ }
+
+ if (metadata->output1[i].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable output1: %u", i);
+ return -ENOTSUP;
+ }
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ if (rte_ml_io_type_size_get(
+ cn10k_ml_io_type_map(metadata->output2[j].output_type)) <= 0) {
+ plt_err("Invalid metadata, output2[%u] : output_type = %u", j,
+ metadata->output2[j].output_type);
+ return -EINVAL;
+ }
+
+ if (rte_ml_io_type_size_get(cn10k_ml_io_type_map(
+ metadata->output2[j].model_output_type)) <= 0) {
+ plt_err("Invalid metadata, output2[%u] : model_output_type = %u", j,
+ metadata->output2[j].model_output_type);
+ return -EINVAL;
+ }
+
+ if (metadata->output2[j].relocatable != 1) {
+ plt_err("Model not supported, non-relocatable output2: %u", j);
+ return -ENOTSUP;
+ }
}
}
@@ -189,31 +252,60 @@ void
cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata)
{
uint8_t i;
+ uint8_t j;
for (i = 0; i < metadata->model.num_input; i++) {
- metadata->input1[i].input_type =
- cn10k_ml_io_type_map(metadata->input1[i].input_type);
- metadata->input1[i].model_input_type =
- cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ metadata->input1[i].input_type =
+ cn10k_ml_io_type_map(metadata->input1[i].input_type);
+ metadata->input1[i].model_input_type =
+ cn10k_ml_io_type_map(metadata->input1[i].model_input_type);
+
+ if (metadata->input1[i].shape.w == 0)
+ metadata->input1[i].shape.w = 1;
+
+ if (metadata->input1[i].shape.x == 0)
+ metadata->input1[i].shape.x = 1;
+
+ if (metadata->input1[i].shape.y == 0)
+ metadata->input1[i].shape.y = 1;
- if (metadata->input1[i].shape.w == 0)
- metadata->input1[i].shape.w = 1;
+ if (metadata->input1[i].shape.z == 0)
+ metadata->input1[i].shape.z = 1;
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ metadata->input2[j].input_type =
+ cn10k_ml_io_type_map(metadata->input2[j].input_type);
+ metadata->input2[j].model_input_type =
+ cn10k_ml_io_type_map(metadata->input2[j].model_input_type);
- if (metadata->input1[i].shape.x == 0)
- metadata->input1[i].shape.x = 1;
+ if (metadata->input2[j].shape.w == 0)
+ metadata->input2[j].shape.w = 1;
- if (metadata->input1[i].shape.y == 0)
- metadata->input1[i].shape.y = 1;
+ if (metadata->input2[j].shape.x == 0)
+ metadata->input2[j].shape.x = 1;
- if (metadata->input1[i].shape.z == 0)
- metadata->input1[i].shape.z = 1;
+ if (metadata->input2[j].shape.y == 0)
+ metadata->input2[j].shape.y = 1;
+
+ if (metadata->input2[j].shape.z == 0)
+ metadata->input2[j].shape.z = 1;
+ }
}
for (i = 0; i < metadata->model.num_output; i++) {
- metadata->output1[i].output_type =
- cn10k_ml_io_type_map(metadata->output1[i].output_type);
- metadata->output1[i].model_output_type =
- cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ metadata->output1[i].output_type =
+ cn10k_ml_io_type_map(metadata->output1[i].output_type);
+ metadata->output1[i].model_output_type =
+ cn10k_ml_io_type_map(metadata->output1[i].model_output_type);
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ metadata->output2[j].output_type =
+ cn10k_ml_io_type_map(metadata->output2[j].output_type);
+ metadata->output2[j].model_output_type =
+ cn10k_ml_io_type_map(metadata->output2[j].model_output_type);
+ }
}
}
@@ -226,6 +318,7 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
uint8_t *dma_addr_load;
uint8_t *dma_addr_run;
uint8_t i;
+ uint8_t j;
int fpos;
metadata = &model->metadata;
@@ -272,37 +365,80 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_
addr->total_input_sz_d = 0;
addr->total_input_sz_q = 0;
for (i = 0; i < metadata->model.num_input; i++) {
- addr->input[i].nb_elements =
- metadata->input1[i].shape.w * metadata->input1[i].shape.x *
- metadata->input1[i].shape.y * metadata->input1[i].shape.z;
- addr->input[i].sz_d = addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input1[i].input_type);
- addr->input[i].sz_q = addr->input[i].nb_elements *
- rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
- addr->total_input_sz_d += addr->input[i].sz_d;
- addr->total_input_sz_q += addr->input[i].sz_q;
-
- plt_ml_dbg("model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
- model->model_id, i, metadata->input1[i].shape.w,
- metadata->input1[i].shape.x, metadata->input1[i].shape.y,
- metadata->input1[i].shape.z, addr->input[i].sz_d, addr->input[i].sz_q);
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ addr->input[i].nb_elements =
+ metadata->input1[i].shape.w * metadata->input1[i].shape.x *
+ metadata->input1[i].shape.y * metadata->input1[i].shape.z;
+ addr->input[i].sz_d =
+ addr->input[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->input1[i].input_type);
+ addr->input[i].sz_q =
+ addr->input[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->input1[i].model_input_type);
+ addr->total_input_sz_d += addr->input[i].sz_d;
+ addr->total_input_sz_q += addr->input[i].sz_q;
+
+ plt_ml_dbg(
+ "model_id = %u, input[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+ model->model_id, i, metadata->input1[i].shape.w,
+ metadata->input1[i].shape.x, metadata->input1[i].shape.y,
+ metadata->input1[i].shape.z, addr->input[i].sz_d,
+ addr->input[i].sz_q);
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ addr->input[i].nb_elements =
+ metadata->input2[j].shape.w * metadata->input2[j].shape.x *
+ metadata->input2[j].shape.y * metadata->input2[j].shape.z;
+ addr->input[i].sz_d =
+ addr->input[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->input2[j].input_type);
+ addr->input[i].sz_q =
+ addr->input[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->input2[j].model_input_type);
+ addr->total_input_sz_d += addr->input[i].sz_d;
+ addr->total_input_sz_q += addr->input[i].sz_q;
+
+ plt_ml_dbg(
+ "model_id = %u, input2[%u] - w:%u x:%u y:%u z:%u, sz_d = %u sz_q = %u",
+ model->model_id, j, metadata->input2[j].shape.w,
+ metadata->input2[j].shape.x, metadata->input2[j].shape.y,
+ metadata->input2[j].shape.z, addr->input[i].sz_d,
+ addr->input[i].sz_q);
+ }
}
/* Outputs */
addr->total_output_sz_q = 0;
addr->total_output_sz_d = 0;
for (i = 0; i < metadata->model.num_output; i++) {
- addr->output[i].nb_elements = metadata->output1[i].size;
- addr->output[i].sz_d = addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output1[i].output_type);
- addr->output[i].sz_q =
- addr->output[i].nb_elements *
- rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
- addr->total_output_sz_q += addr->output[i].sz_q;
- addr->total_output_sz_d += addr->output[i].sz_d;
-
- plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u, sz_q = %u", model->model_id, i,
- addr->output[i].sz_d, addr->output[i].sz_q);
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ addr->output[i].nb_elements = metadata->output1[i].size;
+ addr->output[i].sz_d =
+ addr->output[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->output1[i].output_type);
+ addr->output[i].sz_q =
+ addr->output[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->output1[i].model_output_type);
+ addr->total_output_sz_q += addr->output[i].sz_q;
+ addr->total_output_sz_d += addr->output[i].sz_d;
+
+ plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u, sz_q = %u",
+ model->model_id, i, addr->output[i].sz_d, addr->output[i].sz_q);
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ addr->output[i].nb_elements = metadata->output2[j].size;
+ addr->output[i].sz_d =
+ addr->output[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->output2[j].output_type);
+ addr->output[i].sz_q =
+ addr->output[i].nb_elements *
+ rte_ml_io_type_size_get(metadata->output2[j].model_output_type);
+ addr->total_output_sz_q += addr->output[i].sz_q;
+ addr->total_output_sz_d += addr->output[i].sz_d;
+
+ plt_ml_dbg("model_id = %u, output2[%u] - sz_d = %u, sz_q = %u",
+ model->model_id, j, addr->output[i].sz_d, addr->output[i].sz_q);
+ }
}
}
@@ -366,6 +502,7 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
struct rte_ml_io_info *output;
struct rte_ml_io_info *input;
uint8_t i;
+ uint8_t j;
metadata = &model->metadata;
info = PLT_PTR_CAST(model->info);
@@ -389,26 +526,53 @@ cn10k_ml_model_info_set(struct rte_ml_dev *dev, struct cn10k_ml_model *model)
/* Set input info */
for (i = 0; i < info->nb_inputs; i++) {
- rte_memcpy(input[i].name, metadata->input1[i].input_name, MRVL_ML_INPUT_NAME_LEN);
- input[i].dtype = metadata->input1[i].input_type;
- input[i].qtype = metadata->input1[i].model_input_type;
- input[i].shape.format = metadata->input1[i].shape.format;
- input[i].shape.w = metadata->input1[i].shape.w;
- input[i].shape.x = metadata->input1[i].shape.x;
- input[i].shape.y = metadata->input1[i].shape.y;
- input[i].shape.z = metadata->input1[i].shape.z;
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ rte_memcpy(input[i].name, metadata->input1[i].input_name,
+ MRVL_ML_INPUT_NAME_LEN);
+ input[i].dtype = metadata->input1[i].input_type;
+ input[i].qtype = metadata->input1[i].model_input_type;
+ input[i].shape.format = metadata->input1[i].shape.format;
+ input[i].shape.w = metadata->input1[i].shape.w;
+ input[i].shape.x = metadata->input1[i].shape.x;
+ input[i].shape.y = metadata->input1[i].shape.y;
+ input[i].shape.z = metadata->input1[i].shape.z;
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ rte_memcpy(input[i].name, metadata->input2[j].input_name,
+ MRVL_ML_INPUT_NAME_LEN);
+ input[i].dtype = metadata->input2[j].input_type;
+ input[i].qtype = metadata->input2[j].model_input_type;
+ input[i].shape.format = metadata->input2[j].shape.format;
+ input[i].shape.w = metadata->input2[j].shape.w;
+ input[i].shape.x = metadata->input2[j].shape.x;
+ input[i].shape.y = metadata->input2[j].shape.y;
+ input[i].shape.z = metadata->input2[j].shape.z;
+ }
}
/* Set output info */
for (i = 0; i < info->nb_outputs; i++) {
- rte_memcpy(output[i].name, metadata->output1[i].output_name,
- MRVL_ML_OUTPUT_NAME_LEN);
- output[i].dtype = metadata->output1[i].output_type;
- output[i].qtype = metadata->output1[i].model_output_type;
- output[i].shape.format = RTE_ML_IO_FORMAT_1D;
- output[i].shape.w = metadata->output1[i].size;
- output[i].shape.x = 1;
- output[i].shape.y = 1;
- output[i].shape.z = 1;
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ rte_memcpy(output[i].name, metadata->output1[i].output_name,
+ MRVL_ML_OUTPUT_NAME_LEN);
+ output[i].dtype = metadata->output1[i].output_type;
+ output[i].qtype = metadata->output1[i].model_output_type;
+ output[i].shape.format = RTE_ML_IO_FORMAT_1D;
+ output[i].shape.w = metadata->output1[i].size;
+ output[i].shape.x = 1;
+ output[i].shape.y = 1;
+ output[i].shape.z = 1;
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ rte_memcpy(output[i].name, metadata->output2[j].output_name,
+ MRVL_ML_OUTPUT_NAME_LEN);
+ output[i].dtype = metadata->output2[j].output_type;
+ output[i].qtype = metadata->output2[j].model_output_type;
+ output[i].shape.format = RTE_ML_IO_FORMAT_1D;
+ output[i].shape.w = metadata->output2[j].size;
+ output[i].shape.x = 1;
+ output[i].shape.y = 1;
+ output[i].shape.z = 1;
+ }
}
}
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index bd863a8c12..5c34e4d747 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -30,6 +30,7 @@ enum cn10k_ml_model_state {
#define MRVL_ML_OUTPUT_NAME_LEN 16
#define MRVL_ML_NUM_INPUT_OUTPUT_1 8
#define MRVL_ML_NUM_INPUT_OUTPUT_2 24
+#define MRVL_ML_NUM_INPUT_OUTPUT (MRVL_ML_NUM_INPUT_OUTPUT_1 + MRVL_ML_NUM_INPUT_OUTPUT_2)
/* Header (256-byte) */
struct cn10k_ml_model_metadata_header {
@@ -413,7 +414,7 @@ struct cn10k_ml_model_addr {
/* Quantized input size */
uint32_t sz_q;
- } input[MRVL_ML_NUM_INPUT_OUTPUT_1];
+ } input[MRVL_ML_NUM_INPUT_OUTPUT];
/* Output address and size */
struct {
@@ -425,7 +426,7 @@ struct cn10k_ml_model_addr {
/* Quantized output size */
uint32_t sz_q;
- } output[MRVL_ML_NUM_INPUT_OUTPUT_1];
+ } output[MRVL_ML_NUM_INPUT_OUTPUT];
/* Total size of quantized input */
uint32_t total_input_sz_q;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index aecc6e74ad..1033afb1b0 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -269,6 +269,7 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
struct cn10k_ml_ocm *ocm;
char str[STR_LEN];
uint8_t i;
+ uint8_t j;
mldev = dev->data->dev_private;
ocm = &mldev->ocm;
@@ -324,16 +325,36 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
"model_input_type", "quantize", "format");
print_line(fp, LINE_LEN);
for (i = 0; i < model->metadata.model.num_input; i++) {
- fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.input1[i].input_name);
- rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
- fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str, STR_LEN);
- fprintf(fp, "%*s ", 18, str);
- fprintf(fp, "%*s", 12, (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
- rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str, STR_LEN);
- fprintf(fp, "%*s", 16, str);
- fprintf(fp, "\n");
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ fprintf(fp, "%8u ", i);
+ fprintf(fp, "%*s ", 16, model->metadata.input1[i].input_name);
+ rte_ml_io_type_to_str(model->metadata.input1[i].input_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->metadata.input1[i].model_input_type, str,
+ STR_LEN);
+ fprintf(fp, "%*s ", 18, str);
+ fprintf(fp, "%*s", 12,
+ (model->metadata.input1[i].quantize == 1 ? "Yes" : "No"));
+ rte_ml_io_format_to_str(model->metadata.input1[i].shape.format, str,
+ STR_LEN);
+ fprintf(fp, "%*s", 16, str);
+ fprintf(fp, "\n");
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ fprintf(fp, "%8u ", i);
+ fprintf(fp, "%*s ", 16, model->metadata.input2[j].input_name);
+ rte_ml_io_type_to_str(model->metadata.input2[j].input_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->metadata.input2[j].model_input_type, str,
+ STR_LEN);
+ fprintf(fp, "%*s ", 18, str);
+ fprintf(fp, "%*s", 12,
+ (model->metadata.input2[j].quantize == 1 ? "Yes" : "No"));
+ rte_ml_io_format_to_str(model->metadata.input2[j].shape.format, str,
+ STR_LEN);
+ fprintf(fp, "%*s", 16, str);
+ fprintf(fp, "\n");
+ }
}
fprintf(fp, "\n");
@@ -342,14 +363,30 @@ cn10k_ml_model_print(struct rte_ml_dev *dev, uint16_t model_id, FILE *fp)
"model_output_type", "dequantize");
print_line(fp, LINE_LEN);
for (i = 0; i < model->metadata.model.num_output; i++) {
- fprintf(fp, "%8u ", i);
- fprintf(fp, "%*s ", 16, model->metadata.output1[i].output_name);
- rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
- fprintf(fp, "%*s ", 12, str);
- rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str, STR_LEN);
- fprintf(fp, "%*s ", 18, str);
- fprintf(fp, "%*s", 12, (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
- fprintf(fp, "\n");
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ fprintf(fp, "%8u ", i);
+ fprintf(fp, "%*s ", 16, model->metadata.output1[i].output_name);
+ rte_ml_io_type_to_str(model->metadata.output1[i].output_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->metadata.output1[i].model_output_type, str,
+ STR_LEN);
+ fprintf(fp, "%*s ", 18, str);
+ fprintf(fp, "%*s", 12,
+ (model->metadata.output1[i].dequantize == 1 ? "Yes" : "No"));
+ fprintf(fp, "\n");
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ fprintf(fp, "%8u ", i);
+ fprintf(fp, "%*s ", 16, model->metadata.output2[j].output_name);
+ rte_ml_io_type_to_str(model->metadata.output2[j].output_type, str, STR_LEN);
+ fprintf(fp, "%*s ", 12, str);
+ rte_ml_io_type_to_str(model->metadata.output2[j].model_output_type, str,
+ STR_LEN);
+ fprintf(fp, "%*s ", 18, str);
+ fprintf(fp, "%*s", 12,
+ (model->metadata.output2[j].dequantize == 1 ? "Yes" : "No"));
+ fprintf(fp, "\n");
+ }
}
fprintf(fp, "\n");
print_line(fp, LINE_LEN);
@@ -1863,10 +1900,14 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
void *qbuffer)
{
struct cn10k_ml_model *model;
+ uint8_t model_input_type;
uint8_t *lcl_dbuffer;
uint8_t *lcl_qbuffer;
+ uint8_t input_type;
uint32_t batch_id;
+ float qscale;
uint32_t i;
+ uint32_t j;
int ret;
model = dev->data->models[model_id];
@@ -1882,28 +1923,38 @@ cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_batc
next_batch:
for (i = 0; i < model->metadata.model.num_input; i++) {
- if (model->metadata.input1[i].input_type ==
- model->metadata.input1[i].model_input_type) {
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ input_type = model->metadata.input1[i].input_type;
+ model_input_type = model->metadata.input1[i].model_input_type;
+ qscale = model->metadata.input1[i].qscale;
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ input_type = model->metadata.input2[j].input_type;
+ model_input_type = model->metadata.input2[j].model_input_type;
+ qscale = model->metadata.input2[j].qscale;
+ }
+
+ if (input_type == model_input_type) {
rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->addr.input[i].sz_d);
} else {
switch (model->metadata.input1[i].model_input_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_float32_to_int8(model->metadata.input1[i].qscale,
+ ret = rte_ml_io_float32_to_int8(qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_float32_to_uint8(model->metadata.input1[i].qscale,
+ ret = rte_ml_io_float32_to_uint8(qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_float32_to_int16(model->metadata.input1[i].qscale,
+ ret = rte_ml_io_float32_to_int16(qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_float32_to_uint16(model->metadata.input1[i].qscale,
+ ret = rte_ml_io_float32_to_uint16(qscale,
model->addr.input[i].nb_elements,
lcl_dbuffer, lcl_qbuffer);
break;
@@ -1936,10 +1987,14 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
void *qbuffer, void *dbuffer)
{
struct cn10k_ml_model *model;
+ uint8_t model_output_type;
uint8_t *lcl_qbuffer;
uint8_t *lcl_dbuffer;
+ uint8_t output_type;
uint32_t batch_id;
+ float dscale;
uint32_t i;
+ uint32_t j;
int ret;
model = dev->data->models[model_id];
@@ -1955,28 +2010,38 @@ cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, uint16_t nb_ba
next_batch:
for (i = 0; i < model->metadata.model.num_output; i++) {
- if (model->metadata.output1[i].output_type ==
- model->metadata.output1[i].model_output_type) {
+ if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) {
+ output_type = model->metadata.output1[i].output_type;
+ model_output_type = model->metadata.output1[i].model_output_type;
+ dscale = model->metadata.output1[i].dscale;
+ } else {
+ j = i - MRVL_ML_NUM_INPUT_OUTPUT_1;
+ output_type = model->metadata.output2[j].output_type;
+ model_output_type = model->metadata.output2[j].model_output_type;
+ dscale = model->metadata.output2[j].dscale;
+ }
+
+ if (output_type == model_output_type) {
rte_memcpy(lcl_dbuffer, lcl_qbuffer, model->addr.output[i].sz_q);
} else {
switch (model->metadata.output1[i].model_output_type) {
case RTE_ML_IO_TYPE_INT8:
- ret = rte_ml_io_int8_to_float32(model->metadata.output1[i].dscale,
+ ret = rte_ml_io_int8_to_float32(dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT8:
- ret = rte_ml_io_uint8_to_float32(model->metadata.output1[i].dscale,
+ ret = rte_ml_io_uint8_to_float32(dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_INT16:
- ret = rte_ml_io_int16_to_float32(model->metadata.output1[i].dscale,
+ ret = rte_ml_io_int16_to_float32(dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
case RTE_ML_IO_TYPE_UINT16:
- ret = rte_ml_io_uint16_to_float32(model->metadata.output1[i].dscale,
+ ret = rte_ml_io_uint16_to_float32(dscale,
model->addr.output[i].nb_elements,
lcl_qbuffer, lcl_dbuffer);
break;
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v1 0/3] Add support for 32 I/O per model
2023-04-23 5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
` (2 preceding siblings ...)
2023-04-23 5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
@ 2023-06-12 16:28 ` Thomas Monjalon
3 siblings, 0 replies; 5+ messages in thread
From: Thomas Monjalon @ 2023-06-12 16:28 UTC (permalink / raw)
To: Srikanth Yalavarthi; +Cc: dev, syalavarthi, sshankarnara, aprabhu, ptakkar
23/04/2023 07:08, Srikanth Yalavarthi:
> This patch series adds support for 32 inputs / outputs per each
> model. Changes required to enable the required support include:
>
> 1. Splitiing model metadata fields into structures.
> 2. Update model metadata to v2301 which supports 32 I/O.
> 3. Update ML driver code to support metadata v2301 .
>
>
> Srikanth Yalavarthi (3):
> ml/cnxk: split metadata fields into sections
> ml/cnxk: update model metadata to v2301
> ml/cnxk: add support for 32 I/O per model
Applied, thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-06-12 16:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-23 5:08 [PATCH v1 0/3] Add support for 32 I/O per model Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 1/3] ml/cnxk: split metadata fields into sections Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 2/3] ml/cnxk: update model metadata to v2301 Srikanth Yalavarthi
2023-04-23 5:08 ` [PATCH v1 3/3] ml/cnxk: add support for 32 I/O per model Srikanth Yalavarthi
2023-06-12 16:28 ` [PATCH v1 0/3] Add " Thomas Monjalon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).