DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1 1/1] mldev: introduce data type conversion functions
@ 2024-07-31  6:32 Srikanth Yalavarthi
  2024-10-17 21:16 ` Thomas Monjalon
  0 siblings, 1 reply; 2+ messages in thread
From: Srikanth Yalavarthi @ 2024-07-31  6:32 UTC (permalink / raw)
  To: Srikanth Yalavarthi, Wathsala Vithanage
  Cc: dev, sshankarnara, aprabhu, ptakkar

Introduced data type conversion functions with support for
user defined scale factor and zero-point. Updated library
functions to support asymmetric / affine conversion for
integer types.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 drivers/ml/cnxk/cnxk_ml_io.c            | 134 +++----
 lib/mldev/mldev_utils.h                 | 453 -----------------------
 lib/mldev/mldev_utils_neon.c            | 452 ++++++++++++++---------
 lib/mldev/mldev_utils_neon_bfloat16.c   |  20 +-
 lib/mldev/mldev_utils_scalar.c          | 156 ++++----
 lib/mldev/mldev_utils_scalar_bfloat16.c |  12 +-
 lib/mldev/rte_mldev.h                   | 462 ++++++++++++++++++++++++
 lib/mldev/version.map                   |  40 +-
 8 files changed, 936 insertions(+), 793 deletions(-)

diff --git a/drivers/ml/cnxk/cnxk_ml_io.c b/drivers/ml/cnxk/cnxk_ml_io.c
index 4b0adc2ae47..a418b7e684d 100644
--- a/drivers/ml/cnxk/cnxk_ml_io.c
+++ b/drivers/ml/cnxk/cnxk_ml_io.c
@@ -26,39 +26,40 @@ cnxk_ml_io_quantize_single(struct cnxk_ml_io *input, uint8_t *dbuffer, uint8_t *
 
 	if (dtype == qtype) {
 		rte_memcpy(qbuffer, dbuffer, input->sz_d);
-	} else {
-		switch (qtype) {
-		case RTE_ML_IO_TYPE_INT8:
-			ret = rte_ml_io_float32_to_int8(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT8:
-			ret = rte_ml_io_float32_to_uint8(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT16:
-			ret = rte_ml_io_float32_to_int16(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT16:
-			ret = rte_ml_io_float32_to_uint16(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT32:
-			ret = rte_ml_io_float32_to_int32(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT32:
-			ret = rte_ml_io_float32_to_uint32(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT64:
-			ret = rte_ml_io_float32_to_int64(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT64:
-			ret = rte_ml_io_float32_to_uint64(qscale, nb_elements, dbuffer, qbuffer);
-			break;
-		case RTE_ML_IO_TYPE_FP16:
-			ret = rte_ml_io_float32_to_float16(nb_elements, dbuffer, qbuffer);
-			break;
-		default:
-			plt_err("Unsupported qtype : %u", qtype);
-			ret = -ENOTSUP;
-		}
+		return ret;
+	}
+
+	switch (qtype) {
+	case RTE_ML_IO_TYPE_INT8:
+		ret = rte_ml_io_float32_to_int8(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT8:
+		ret = rte_ml_io_float32_to_uint8(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT16:
+		ret = rte_ml_io_float32_to_int16(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT16:
+		ret = rte_ml_io_float32_to_uint16(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT32:
+		ret = rte_ml_io_float32_to_int32(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT32:
+		ret = rte_ml_io_float32_to_uint32(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT64:
+		ret = rte_ml_io_float32_to_int64(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT64:
+		ret = rte_ml_io_float32_to_uint64(dbuffer, qbuffer, nb_elements, 1.0 / qscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_FP16:
+		ret = rte_ml_io_float32_to_float16(dbuffer, qbuffer, nb_elements);
+		break;
+	default:
+		plt_err("Unsupported qtype : %u", qtype);
+		ret = -ENOTSUP;
 	}
 
 	return ret;
@@ -80,39 +81,40 @@ cnxk_ml_io_dequantize_single(struct cnxk_ml_io *output, uint8_t *qbuffer, uint8_
 
 	if (dtype == qtype) {
 		rte_memcpy(dbuffer, qbuffer, output->sz_q);
-	} else {
-		switch (qtype) {
-		case RTE_ML_IO_TYPE_INT8:
-			ret = rte_ml_io_int8_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT8:
-			ret = rte_ml_io_uint8_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT16:
-			ret = rte_ml_io_int16_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT16:
-			ret = rte_ml_io_uint16_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT32:
-			ret = rte_ml_io_int32_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT32:
-			ret = rte_ml_io_uint32_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_INT64:
-			ret = rte_ml_io_int64_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_UINT64:
-			ret = rte_ml_io_uint64_to_float32(dscale, nb_elements, qbuffer, dbuffer);
-			break;
-		case RTE_ML_IO_TYPE_FP16:
-			ret = rte_ml_io_float16_to_float32(nb_elements, qbuffer, dbuffer);
-			break;
-		default:
-			plt_err("Unsupported qtype: %u", qtype);
-			ret = -ENOTSUP;
-		}
+		return 0;
+	}
+
+	switch (qtype) {
+	case RTE_ML_IO_TYPE_INT8:
+		ret = rte_ml_io_int8_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT8:
+		ret = rte_ml_io_uint8_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT16:
+		ret = rte_ml_io_int16_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT16:
+		ret = rte_ml_io_uint16_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT32:
+		ret = rte_ml_io_int32_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT32:
+		ret = rte_ml_io_uint32_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_INT64:
+		ret = rte_ml_io_int64_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_UINT64:
+		ret = rte_ml_io_uint64_to_float32(qbuffer, dbuffer, nb_elements, dscale, 0);
+		break;
+	case RTE_ML_IO_TYPE_FP16:
+		ret = rte_ml_io_float16_to_float32(qbuffer, dbuffer, nb_elements);
+		break;
+	default:
+		plt_err("Unsupported qtype: %u", qtype);
+		ret = -ENOTSUP;
 	}
 
 	return ret;
diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 5e2a180adce..37c90b44a8e 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -52,459 +52,6 @@ __rte_internal
 void
 rte_ml_io_type_to_str(enum rte_ml_io_type type, char *str, int len);
 
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit
- * integer format (INT8).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
- * 8-bit integer format (UINT8).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to signed
- * 16-bit integer format (INT16).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision
- * floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
- * 16-bit integer format (UINT16).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single
- * precision floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to signed 32-bit integer format (INT32).
- *
- * @param[in] scale
- *	Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 32-bit integer format (INT32)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- *	Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to unsigned 32-bit integer format (UINT32).
- *
- * @param[in] scale
- *	Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- *	Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to signed 64-bit integer format (INT64).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in signed 64-bit integer format (INT64)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32)
- * to unsigned 64-bit integer format (UINT64).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- * @param[out] output
- *	Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64)
- * to single precision floating format (float32).
- *
- * @param[in] scale
- *      Scale factor for conversion.
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 8) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to half
- * precision floating point format (FP16).
- *
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
- * @param[out] output
- *	Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in half precision floating format (FP16) to single precision
- * floating point format (float32).
- *
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in single precision floating format (float32) to brain
- * floating point format (bfloat16).
- *
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
- * @param[out] output
- *	Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output);
-
-/**
- * @internal
- *
- * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision
- * floating point format (float32).
- *
- * @param[in] nb_elements
- *	Number of elements in the buffer.
- * @param[in] input
- *	Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2)
- * bytes.
- * @param[out] output
- *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
- *
- * @return
- *	- 0, Success.
- *	- < 0, Error code on failure.
- */
-__rte_internal
-int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/mldev/mldev_utils_neon.c b/lib/mldev/mldev_utils_neon.c
index 4cde2ebabd3..8751a40863e 100644
--- a/lib/mldev/mldev_utils_neon.c
+++ b/lib/mldev/mldev_utils_neon.c
@@ -17,7 +17,7 @@
  */
 
 static inline void
-__float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
+__float32_to_int8_neon_s8x8(const float *input, int8_t *output, float scale, int8_t zero_point)
 {
 	int16x4_t s16x4_l;
 	int16x4_t s16x4_h;
@@ -30,7 +30,8 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
 	 * Use round to nearest with ties away rounding mode.
 	 */
 	f32x4 = vld1q_f32(input);
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	s32x4 = vcvtaq_s32_f32(f32x4);
 	s16x4_l = vqmovn_s32(s32x4);
 
@@ -38,7 +39,8 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
 	 * Use round to nearest with ties away rounding mode.
 	 */
 	f32x4 = vld1q_f32(input + 4);
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	s32x4 = vcvtaq_s32_f32(f32x4);
 	s16x4_h = vqmovn_s32(s32x4);
 
@@ -47,31 +49,37 @@ __float32_to_int8_neon_s8x8(float scale, float *input, int8_t *output)
 
 	/* narrow to int8_t */
 	s8x8 = vqmovn_s16(s16x8);
+	s8x8 = vmax_s8(s8x8, vdup_n_s8(INT8_MIN + 1));
 
 	/* store 8 elements */
 	vst1_s8(output, s8x8);
 }
 
 static inline void
-__float32_to_int8_neon_s8x1(float scale, float *input, int8_t *output)
+__float32_to_int8_neon_s8x1(const float *input, int8_t *output, float scale, int8_t zero_point)
 {
-	int32_t s32;
+	float32x2_t f32x2;
+	int32x2_t s32x2;
 	int16_t s16;
 
 	/* scale and convert, round to nearest with ties away rounding mode */
-	s32 = vcvtas_s32_f32(scale * (*input));
+	f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+	s32x2 = vcvta_s32_f32(f32x2);
+	s32x2 = vmax_s32(s32x2, vdup_n_s32(INT8_MIN + 1));
 
 	/* saturate narrow */
-	s16 = vqmovns_s32(s32);
+	s16 = vqmovns_s32(vget_lane_s32(s32x2, 0));
 
 	/* convert to int8_t */
 	*output = vqmovnh_s16(s16);
 }
 
 int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int8(const void *input, void *output, uint64_t nb_elements, float scale,
+			  int8_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int8_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -80,14 +88,14 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int8_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(int8_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_int8_neon_s8x8(scale, input_buffer, output_buffer);
+		__float32_to_int8_neon_s8x8(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -95,7 +103,7 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_int8_neon_s8x1(scale, input_buffer, output_buffer);
+		__float32_to_int8_neon_s8x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -104,7 +112,7 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
 }
 
 static inline void
-__int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
+__int8_to_float32_neon_f32x8(const int8_t *input, float *output, float scale, int8_t zero_point)
 {
 	float32x4_t f32x4;
 	int16x8_t s16x8;
@@ -122,6 +130,7 @@ __int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
 	s16x4 = vget_low_s16(s16x8);
 	s32x4 = vmovl_s16(s16x4);
 	f32x4 = vcvtq_f32_s32(s32x4);
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	f32x4 = vmulq_n_f32(f32x4, scale);
 	vst1q_f32(output, f32x4);
 
@@ -129,20 +138,22 @@ __int8_to_float32_neon_f32x8(float scale, int8_t *input, float *output)
 	s16x4 = vget_high_s16(s16x8);
 	s32x4 = vmovl_s16(s16x4);
 	f32x4 = vcvtq_f32_s32(s32x4);
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	f32x4 = vmulq_n_f32(f32x4, scale);
 	vst1q_f32(output + 4, f32x4);
 }
 
 static inline void
-__int8_to_float32_neon_f32x1(float scale, int8_t *input, float *output)
+__int8_to_float32_neon_f32x1(const int8_t *input, float *output, float scale, int8_t zero_point)
 {
-	*output = scale * vcvts_f32_s32((int32_t)*input);
+	*output = scale * (vcvts_f32_s32((int32_t)*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			  int8_t zero_point)
 {
-	int8_t *input_buffer;
+	const int8_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -151,14 +162,14 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int8_t *)input;
+	input_buffer = (const int8_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(int8_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__int8_to_float32_neon_f32x8(scale, input_buffer, output_buffer);
+		__int8_to_float32_neon_f32x8(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -166,7 +177,7 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__int8_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__int8_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -175,7 +186,7 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
 }
 
 static inline void
-__float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
+__float32_to_uint8_neon_u8x8(const float *input, uint8_t *output, float scale, uint8_t zero_point)
 {
 	uint16x4_t u16x4_l;
 	uint16x4_t u16x4_h;
@@ -188,7 +199,8 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
 	 * use round to nearest with ties away rounding mode.
 	 */
 	f32x4 = vld1q_f32(input);
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	u32x4 = vcvtaq_u32_f32(f32x4);
 	u16x4_l = vqmovn_u32(u32x4);
 
@@ -196,7 +208,8 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
 	 * use round to nearest with ties away rounding mode.
 	 */
 	f32x4 = vld1q_f32(input + 4);
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	u32x4 = vcvtaq_u32_f32(f32x4);
 	u16x4_h = vqmovn_u32(u32x4);
 
@@ -211,25 +224,29 @@ __float32_to_uint8_neon_u8x8(float scale, float *input, uint8_t *output)
 }
 
 static inline void
-__float32_to_uint8_neon_u8x1(float scale, float *input, uint8_t *output)
+__float32_to_uint8_neon_u8x1(const float *input, uint8_t *output, float scale, uint8_t zero_point)
 {
-	uint32_t u32;
+	float32x2_t f32x2;
+	uint32x2_t u32x2;
 	uint16_t u16;
 
 	/* scale and convert, round to nearest with ties away rounding mode */
-	u32 = vcvtas_u32_f32(scale * (*input));
+	f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+	u32x2 = vcvta_u32_f32(f32x2);
 
 	/* saturate narrow */
-	u16 = vqmovns_u32(u32);
+	u16 = vqmovns_u32(vget_lane_u32(u32x2, 0));
 
 	/* convert to uint8_t */
 	*output = vqmovnh_u16(u16);
 }
 
 int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint8(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint8_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint8_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -238,14 +255,14 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint8_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint8_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_uint8_neon_u8x8(scale, input_buffer, output_buffer);
+		__float32_to_uint8_neon_u8x8(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -253,7 +270,7 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_uint8_neon_u8x1(scale, input_buffer, output_buffer);
+		__float32_to_uint8_neon_u8x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -262,45 +279,48 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__uint8_to_float32_neon_f32x8(float scale, uint8_t *input, float *output)
+__uint8_to_float32_neon_f32x8(const uint8_t *input, float *output, float scale, uint8_t zero_point)
 {
 	float32x4_t f32x4;
 	uint16x8_t u16x8;
-	uint16x4_t u16x4;
-	uint32x4_t u32x4;
+	int16x8_t s16x8;
+	int16x4_t s16x4;
+	int32x4_t s32x4;
 	uint8x8_t u8x8;
 
 	/* load 8 x uint8_t elements */
 	u8x8 = vld1_u8(input);
-
-	/* widen uint8_t to uint16_t */
 	u16x8 = vmovl_u8(u8x8);
+	s16x8 = vreinterpretq_s16_u16(u16x8);
 
 	/* convert lower 4 elements: widen to uint32_t, convert to float, scale and store */
-	u16x4 = vget_low_u16(u16x8);
-	u32x4 = vmovl_u16(u16x4);
-	f32x4 = vcvtq_f32_u32(u32x4);
+	s16x4 = vget_low_s16(s16x8);
+	s32x4 = vmovl_s16(s16x4);
+	f32x4 = vcvtq_f32_s32(s32x4);
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	f32x4 = vmulq_n_f32(f32x4, scale);
 	vst1q_f32(output, f32x4);
 
 	/* convert higher 4 elements: widen to uint32_t, convert to float, scale and store */
-	u16x4 = vget_high_u16(u16x8);
-	u32x4 = vmovl_u16(u16x4);
-	f32x4 = vcvtq_f32_u32(u32x4);
+	s16x4 = vget_high_s16(s16x8);
+	s32x4 = vmovl_s16(s16x4);
+	f32x4 = vcvtq_f32_s32(s32x4);
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
 	f32x4 = vmulq_n_f32(f32x4, scale);
 	vst1q_f32(output + 4, f32x4);
 }
 
 static inline void
-__uint8_to_float32_neon_f32x1(float scale, uint8_t *input, float *output)
+__uint8_to_float32_neon_f32x1(const uint8_t *input, float *output, float scale, uint8_t zero_point)
 {
-	*output = scale * vcvts_f32_u32((uint32_t)*input);
+	*output = scale * (vcvts_f32_u32((uint32_t)*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint8_t zero_point)
 {
-	uint8_t *input_buffer;
+	const uint8_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint64_t vlen;
@@ -309,14 +329,14 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint8_t *)input;
+	input_buffer = (const uint8_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint8_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__uint8_to_float32_neon_f32x8(scale, input_buffer, output_buffer);
+		__uint8_to_float32_neon_f32x8(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -324,7 +344,7 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__uint8_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__uint8_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -333,7 +353,7 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_int16_neon_s16x4(float scale, float *input, int16_t *output)
+__float32_to_int16_neon_s16x4(const float *input, int16_t *output, float scale, int16_t zero_point)
 {
 	float32x4_t f32x4;
 	int16x4_t s16x4;
@@ -343,34 +363,43 @@ __float32_to_int16_neon_s16x4(float scale, float *input, int16_t *output)
 	f32x4 = vld1q_f32(input);
 
 	/* scale */
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+	/* add zero point */
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 
 	/* convert to int32x4_t using round to nearest with ties away rounding mode */
 	s32x4 = vcvtaq_s32_f32(f32x4);
 
 	/* saturate narrow to int16x4_t */
 	s16x4 = vqmovn_s32(s32x4);
+	s16x4 = vmax_s16(s16x4, vdup_n_s16(INT16_MIN + 1));
 
 	/* store 4 elements */
 	vst1_s16(output, s16x4);
 }
 
 static inline void
-__float32_to_int16_neon_s16x1(float scale, float *input, int16_t *output)
+__float32_to_int16_neon_s16x1(const float *input, int16_t *output, float scale, int16_t zero_point)
 {
-	int32_t s32;
+	float32x2_t f32x2;
+	int32x2_t s32x2;
 
 	/* scale and convert, round to nearest with ties away rounding mode */
-	s32 = vcvtas_s32_f32(scale * (*input));
+	f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+	s32x2 = vcvta_s32_f32(f32x2);
+	s32x2 = vmax_s32(s32x2, vdup_n_s32(INT16_MIN + 1));
 
 	/* saturate narrow */
-	*output = vqmovns_s32(s32);
+	*output = vqmovns_s32(vget_lane_s32(s32x2, 0));
 }
 
 int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int16(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int16_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int16_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -379,14 +408,14 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int16_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(int16_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_int16_neon_s16x4(scale, input_buffer, output_buffer);
+		__float32_to_int16_neon_s16x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -394,7 +423,7 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_int16_neon_s16x1(scale, input_buffer, output_buffer);
+		__float32_to_int16_neon_s16x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -403,7 +432,7 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
+__int16_to_float32_neon_f32x4(const int16_t *input, float *output, float scale, int16_t zero_point)
 {
 	float32x4_t f32x4;
 	int16x4_t s16x4;
@@ -418,6 +447,9 @@ __int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
 	/* convert int32_t to float */
 	f32x4 = vcvtq_f32_s32(s32x4);
 
+	/* subtract zero point */
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
 	/* scale */
 	f32x4 = vmulq_n_f32(f32x4, scale);
 
@@ -426,15 +458,16 @@ __int16_to_float32_neon_f32x4(float scale, int16_t *input, float *output)
 }
 
 static inline void
-__int16_to_float32_neon_f32x1(float scale, int16_t *input, float *output)
+__int16_to_float32_neon_f32x1(const int16_t *input, float *output, float scale, int16_t zero_point)
 {
-	*output = scale * vcvts_f32_s32((int32_t)*input);
+	*output = scale * (vcvts_f32_s32((int32_t)*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int16_t zero_point)
 {
-	int16_t *input_buffer;
+	const int16_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -443,14 +476,14 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int16_t *)input;
+	input_buffer = (const int16_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(int16_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__int16_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		__int16_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -458,7 +491,7 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__int16_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__int16_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -467,7 +500,8 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
+__float32_to_uint16_neon_u16x4(const float *input, uint16_t *output, float scale,
+			       uint16_t zero_point)
 {
 	float32x4_t f32x4;
 	uint16x4_t u16x4;
@@ -477,7 +511,10 @@ __float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
 	f32x4 = vld1q_f32(input);
 
 	/* scale */
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+	/* add zero point */
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 
 	/* convert using round to nearest with ties to away rounding mode */
 	u32x4 = vcvtaq_u32_f32(f32x4);
@@ -490,21 +527,23 @@ __float32_to_uint16_neon_u16x4(float scale, float *input, uint16_t *output)
 }
 
 static inline void
-__float32_to_uint16_neon_u16x1(float scale, float *input, uint16_t *output)
+__float32_to_uint16_neon_u16x1(const float *input, uint16_t *output, float scale,
+			       uint16_t zero_point)
 {
 	uint32_t u32;
 
 	/* scale and convert, round to nearest with ties away rounding mode */
-	u32 = vcvtas_u32_f32(scale * (*input));
+	u32 = vcvtas_u32_f32((*input) / scale + (float)zero_point);
 
 	/* saturate narrow */
-	*output = vqmovns_u32(u32);
+	*output = vqmovns_u32(u32) + zero_point;
 }
 
 int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint16(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint16_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint16_t *output_buffer;
 	uint64_t nb_iterations;
 	uint64_t vlen;
@@ -513,14 +552,14 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint16_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint16_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_uint16_neon_u16x4(scale, input_buffer, output_buffer);
+		__float32_to_uint16_neon_u16x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -528,7 +567,7 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_uint16_neon_u16x1(scale, input_buffer, output_buffer);
+		__float32_to_uint16_neon_u16x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -537,7 +576,8 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
+__uint16_to_float32_neon_f32x4(const uint16_t *input, float *output, float scale,
+			       uint16_t zero_point)
 {
 	float32x4_t f32x4;
 	uint16x4_t u16x4;
@@ -552,6 +592,9 @@ __uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
 	/* convert uint32_t to float */
 	f32x4 = vcvtq_f32_u32(u32x4);
 
+	/* subtract zero point */
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
 	/* scale */
 	f32x4 = vmulq_n_f32(f32x4, scale);
 
@@ -560,15 +603,17 @@ __uint16_to_float32_neon_f32x4(float scale, uint16_t *input, float *output)
 }
 
 static inline void
-__uint16_to_float32_neon_f32x1(float scale, uint16_t *input, float *output)
+__uint16_to_float32_neon_f32x1(const uint16_t *input, float *output, float scale,
+			       uint16_t zero_point)
 {
-	*output = scale * vcvts_f32_u32((uint32_t)*input);
+	*output = scale * (vcvts_f32_u32((uint32_t)*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint16_t zero_point)
 {
-	uint16_t *input_buffer;
+	const uint16_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -577,14 +622,14 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint16_t *)input;
+	input_buffer = (const uint16_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint16_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__uint16_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		__uint16_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -592,7 +637,7 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__uint16_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__uint16_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -601,7 +646,7 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output)
+__float32_to_int32_neon_s32x4(const float *input, int32_t *output, float scale, int32_t zero_point)
 {
 	float32x4_t f32x4;
 	int32x4_t s32x4;
@@ -610,26 +655,43 @@ __float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output)
 	f32x4 = vld1q_f32(input);
 
 	/* scale */
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+	/* add zero point */
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 
 	/* convert to int32x4_t using round to nearest with ties away rounding mode */
 	s32x4 = vcvtaq_s32_f32(f32x4);
 
+	/* add zero_point */
+	s32x4 = vaddq_s32(s32x4, vdupq_n_s32(zero_point));
+	s32x4 = vmaxq_s32(s32x4, vdupq_n_s32(INT32_MIN + 1));
+
 	/* store 4 elements */
 	vst1q_s32(output, s32x4);
 }
 
 static inline void
-__float32_to_int32_neon_s32x1(float scale, float *input, int32_t *output)
+__float32_to_int32_neon_s32x1(const float *input, int32_t *output, float scale, int32_t zero_point)
 {
+	float32x2_t f32x2;
+	int32x2_t s32x2;
+
 	/* scale and convert, round to nearest with ties away rounding mode */
-	*output = vcvtas_s32_f32(scale * (*input));
+	f32x2 = vdiv_f32(vdup_n_f32(*input), vdup_n_f32(scale));
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
+	s32x2 = vcvta_s32_f32(f32x2);
+	s32x2 = vmax_s32(s32x2, vdup_n_s32(INT16_MIN + 1));
+
+	/* saturate narrow */
+	vst1_lane_s32(output, s32x2, 0);
 }
 
 int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int32_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int32_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -638,14 +700,14 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int32_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(int32_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_int32_neon_s32x4(scale, input_buffer, output_buffer);
+		__float32_to_int32_neon_s32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -653,7 +715,7 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_int32_neon_s32x1(scale, input_buffer, output_buffer);
+		__float32_to_int32_neon_s32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -662,7 +724,7 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
+__int32_to_float32_neon_f32x4(const int32_t *input, float *output, float scale, int32_t zero_point)
 {
 	float32x4_t f32x4;
 	int32x4_t s32x4;
@@ -673,6 +735,9 @@ __int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
 	/* convert int32_t to float */
 	f32x4 = vcvtq_f32_s32(s32x4);
 
+	/* subtract zero point */
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
 	/* scale */
 	f32x4 = vmulq_n_f32(f32x4, scale);
 
@@ -681,15 +746,16 @@ __int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
 }
 
 static inline void
-__int32_to_float32_neon_f32x1(float scale, int32_t *input, float *output)
+__int32_to_float32_neon_f32x1(const int32_t *input, float *output, float scale, int32_t zero_point)
 {
-	*output = scale * vcvts_f32_s32(*input);
+	*output = scale * (vcvts_f32_s32(*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int32_t zero_point)
 {
-	int32_t *input_buffer;
+	const int32_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -698,14 +764,14 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int32_t *)input;
+	input_buffer = (const int32_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(int32_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__int32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		__int32_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -713,7 +779,7 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__int32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__int32_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -722,7 +788,8 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
+__float32_to_uint32_neon_u32x4(const float *input, uint32_t *output, float scale,
+			       uint32_t zero_point)
 {
 	float32x4_t f32x4;
 	uint32x4_t u32x4;
@@ -731,7 +798,10 @@ __float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
 	f32x4 = vld1q_f32(input);
 
 	/* scale */
-	f32x4 = vmulq_n_f32(f32x4, scale);
+	f32x4 = vdivq_f32(f32x4, vdupq_n_f32(scale));
+
+	/* add zero point */
+	f32x4 = vaddq_f32(f32x4, vdupq_n_f32((float)zero_point));
 
 	/* convert using round to nearest with ties to away rounding mode */
 	u32x4 = vcvtaq_u32_f32(f32x4);
@@ -741,16 +811,18 @@ __float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
 }
 
 static inline void
-__float32_to_uint32_neon_u32x1(float scale, float *input, uint32_t *output)
+__float32_to_uint32_neon_u32x1(const float *input, uint32_t *output, float scale,
+			       uint32_t zero_point)
 {
 	/* scale and convert, round to nearest with ties away rounding mode */
-	*output = vcvtas_u32_f32(scale * (*input));
+	*output = vcvtas_u32_f32((*input) / scale + (float)zero_point);
 }
 
 int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint32_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint32_t *output_buffer;
 	uint64_t nb_iterations;
 	uint64_t vlen;
@@ -759,14 +831,14 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint32_t *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint32_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_uint32_neon_u32x4(scale, input_buffer, output_buffer);
+		__float32_to_uint32_neon_u32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -774,7 +846,7 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_uint32_neon_u32x1(scale, input_buffer, output_buffer);
+		__float32_to_uint32_neon_u32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -783,7 +855,8 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
+__uint32_to_float32_neon_f32x4(const uint32_t *input, float *output, float scale,
+			       uint32_t zero_point)
 {
 	float32x4_t f32x4;
 	uint32x4_t u32x4;
@@ -794,6 +867,9 @@ __uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
 	/* convert uint32_t to float */
 	f32x4 = vcvtq_f32_u32(u32x4);
 
+	/* subtract zero point */
+	f32x4 = vsubq_f32(f32x4, vdupq_n_f32((float)zero_point));
+
 	/* scale */
 	f32x4 = vmulq_n_f32(f32x4, scale);
 
@@ -802,15 +878,17 @@ __uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
 }
 
 static inline void
-__uint32_to_float32_neon_f32x1(float scale, uint32_t *input, float *output)
+__uint32_to_float32_neon_f32x1(const uint32_t *input, float *output, float scale,
+			       uint32_t zero_point)
 {
-	*output = scale * vcvts_f32_u32(*input);
+	*output = scale * (vcvts_f32_u32(*input) - (float)zero_point);
 }
 
 int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint32_t zero_point)
 {
-	uint32_t *input_buffer;
+	const uint32_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -819,14 +897,14 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint32_t *)input;
+	input_buffer = (const uint32_t *)input;
 	output_buffer = (float *)output;
 	vlen = 2 * sizeof(float) / sizeof(uint32_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__uint32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		__uint32_to_float32_neon_f32x4(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -834,7 +912,7 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__uint32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__uint32_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -843,55 +921,68 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_int64_neon_s64x2(float scale, float *input, int64_t *output)
+__float32_to_int64_neon_s64x2(const float *input, int64_t *output, float scale, int64_t zero_point)
 {
 	float32x2_t f32x2;
 	float64x2_t f64x2;
 	int64x2_t s64x2;
+	int64_t s64;
 
 	/* load 2 x float elements */
 	f32x2 = vld1_f32(input);
 
 	/* scale */
-	f32x2 = vmul_n_f32(f32x2, scale);
+	f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+	/* add zero point */
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
 
 	/* convert to float64x2_t */
 	f64x2 = vcvt_f64_f32(f32x2);
 
 	/* convert to int64x2_t */
 	s64x2 = vcvtaq_s64_f64(f64x2);
+	s64 = vgetq_lane_s64(s64x2, 0);
+	s64 = (s64 == INT64_MIN) ? INT64_MIN + 1 : s64;
 
-	/* store 2 elements */
-	vst1q_s64(output, s64x2);
+	/* store lane 0 of int64x2_t */
+	*output = s64;
 }
 
 static inline void
-__float32_to_int64_neon_s64x1(float scale, float *input, int64_t *output)
+__float32_to_int64_neon_s64x1(const float *input, int64_t *output, float scale, int64_t zero_point)
 {
 	float32x2_t f32x2;
 	float64x2_t f64x2;
 	int64x2_t s64x2;
+	int64_t s64;
 
 	/* load 1 x float element */
 	f32x2 = vdup_n_f32(*input);
 
 	/* scale */
-	f32x2 = vmul_n_f32(f32x2, scale);
+	f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+	/* add zero point */
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
 
 	/* convert to float64x2_t */
 	f64x2 = vcvt_f64_f32(f32x2);
 
 	/* convert to int64x2_t */
 	s64x2 = vcvtaq_s64_f64(f64x2);
+	s64 = vgetq_lane_s64(s64x2, 0);
+	s64 = (s64 == INT64_MIN) ? INT64_MIN + 1 : s64;
 
 	/* store lane 0 of int64x2_t */
-	vst1q_lane_s64(output, s64x2, 0);
+	*output = s64;
 }
 
 int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int64(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int64_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int64_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -900,14 +991,14 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int64_t *)output;
 	vlen = 4 * sizeof(float) / sizeof(int64_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_int64_neon_s64x2(scale, input_buffer, output_buffer);
+		__float32_to_int64_neon_s64x2(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -915,7 +1006,7 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_int64_neon_s64x1(scale, input_buffer, output_buffer);
+		__float32_to_int64_neon_s64x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -924,7 +1015,7 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
+__int64_to_float32_neon_f32x2(const int64_t *input, float *output, float scale, int64_t zero_point)
 {
 	int64x2_t s64x2;
 	float64x2_t f64x2;
@@ -939,6 +1030,9 @@ __int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
 	/* convert float64x2_t to float32x2_t */
 	f32x2 = vcvt_f32_f64(f64x2);
 
+	/* subtract zero_point */
+	f32x2 = vsub_f32(f32x2, vdup_n_f32(zero_point));
+
 	/* scale */
 	f32x2 = vmul_n_f32(f32x2, scale);
 
@@ -947,7 +1041,7 @@ __int64_to_float32_neon_f32x2(float scale, int64_t *input, float *output)
 }
 
 static inline void
-__int64_to_float32_neon_f32x1(float scale, int64_t *input, float *output)
+__int64_to_float32_neon_f32x1(const int64_t *input, float *output, float scale, int64_t zero_point)
 {
 	int64x2_t s64x2;
 	float64x2_t f64x2;
@@ -962,17 +1056,21 @@ __int64_to_float32_neon_f32x1(float scale, int64_t *input, float *output)
 	/* convert float64x2_t to float32x2_t */
 	f32x2 = vcvt_f32_f64(f64x2);
 
+	/* subtract zero_point */
+	f32x2 = vsub_f32(f32x2, vdup_n_f32(zero_point));
+
 	/* scale */
 	f32x2 = vmul_n_f32(f32x2, scale);
 
-	/* store float32x2_t */
+	/* store float32x2_t lane 0 */
 	vst1_lane_f32(output, f32x2, 0);
 }
 
 int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int64_t zero_point)
 {
-	int64_t *input_buffer;
+	const int64_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -981,14 +1079,14 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int64_t *)input;
+	input_buffer = (const int64_t *)input;
 	output_buffer = (float *)output;
 	vlen = 4 * sizeof(float) / sizeof(int64_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__int64_to_float32_neon_f32x2(scale, input_buffer, output_buffer);
+		__int64_to_float32_neon_f32x2(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -996,7 +1094,7 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__int64_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__int64_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -1005,7 +1103,8 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
+__float32_to_uint64_neon_u64x2(const float *input, uint64_t *output, float scale,
+			       uint64_t zero_point)
 {
 	float32x2_t f32x2;
 	float64x2_t f64x2;
@@ -1015,7 +1114,10 @@ __float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
 	f32x2 = vld1_f32(input);
 
 	/* scale */
-	f32x2 = vmul_n_f32(f32x2, scale);
+	f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+	/* add zero point */
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
 
 	/* convert to float64x2_t */
 	f64x2 = vcvt_f64_f32(f32x2);
@@ -1028,7 +1130,8 @@ __float32_to_uint64_neon_u64x2(float scale, float *input, uint64_t *output)
 }
 
 static inline void
-__float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
+__float32_to_uint64_neon_u64x1(const float *input, uint64_t *output, float scale,
+			       uint64_t zero_point)
 {
 	float32x2_t f32x2;
 	float64x2_t f64x2;
@@ -1038,7 +1141,10 @@ __float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
 	f32x2 = vld1_lane_f32(input, vdup_n_f32(0), 0);
 
 	/* scale */
-	f32x2 = vmul_n_f32(f32x2, scale);
+	f32x2 = vdiv_f32(f32x2, vdup_n_f32(scale));
+
+	/* add zero_point */
+	f32x2 = vadd_f32(f32x2, vdup_n_f32((float)zero_point));
 
 	/* convert to float64x2_t */
 	f64x2 = vcvt_f64_f32(f32x2);
@@ -1051,9 +1157,10 @@ __float32_to_uint64_neon_u64x1(float scale, float *input, uint64_t *output)
 }
 
 int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint64(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint64_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint64_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -1062,14 +1169,14 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint64_t *)output;
 	vlen = 4 * sizeof(float) / sizeof(uint64_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__float32_to_uint64_neon_u64x2(scale, input_buffer, output_buffer);
+		__float32_to_uint64_neon_u64x2(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -1077,7 +1184,7 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__float32_to_uint64_neon_u64x1(scale, input_buffer, output_buffer);
+		__float32_to_uint64_neon_u64x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -1086,7 +1193,8 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
+__uint64_to_float32_neon_f32x2(const uint64_t *input, float *output, float scale,
+			       uint64_t zero_point)
 {
 	uint64x2_t u64x2;
 	float64x2_t f64x2;
@@ -1101,6 +1209,9 @@ __uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
 	/* convert float64x2_t to float32x2_t */
 	f32x2 = vcvt_f32_f64(f64x2);
 
+	/* subtract zero_point */
+	f32x2 = vsub_f32(f32x2, vdup_n_f32((float)zero_point));
+
 	/* scale */
 	f32x2 = vmul_n_f32(f32x2, scale);
 
@@ -1109,7 +1220,8 @@ __uint64_to_float32_neon_f32x2(float scale, uint64_t *input, float *output)
 }
 
 static inline void
-__uint64_to_float32_neon_f32x1(float scale, uint64_t *input, float *output)
+__uint64_to_float32_neon_f32x1(const uint64_t *input, float *output, float scale,
+			       uint64_t zero_point)
 {
 	uint64x2_t u64x2;
 	float64x2_t f64x2;
@@ -1124,17 +1236,21 @@ __uint64_to_float32_neon_f32x1(float scale, uint64_t *input, float *output)
 	/* convert float64x2_t to float32x2_t */
 	f32x2 = vcvt_f32_f64(f64x2);
 
+	/* subtract zero_point */
+	f32x2 = vsub_f32(f32x2, vdup_n_f32((float)zero_point));
+
 	/* scale */
 	f32x2 = vmul_n_f32(f32x2, scale);
 
-	/* store float32x2_t */
+	/* store float32x2_t lane 0 */
 	vst1_lane_f32(output, f32x2, 0);
 }
 
 int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint64_t zero_point)
 {
-	uint64_t *input_buffer;
+	const uint64_t *input_buffer;
 	float *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -1143,14 +1259,14 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint64_t *)input;
+	input_buffer = (const uint64_t *)input;
 	output_buffer = (float *)output;
 	vlen = 4 * sizeof(float) / sizeof(uint64_t);
 	nb_iterations = nb_elements / vlen;
 
 	/* convert vlen elements in each iteration */
 	for (i = 0; i < nb_iterations; i++) {
-		__uint64_to_float32_neon_f32x2(scale, input_buffer, output_buffer);
+		__uint64_to_float32_neon_f32x2(input_buffer, output_buffer, scale, zero_point);
 		input_buffer += vlen;
 		output_buffer += vlen;
 	}
@@ -1158,7 +1274,7 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
 	/* convert leftover elements */
 	i = i * vlen;
 	for (; i < nb_elements; i++) {
-		__uint64_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		__uint64_to_float32_neon_f32x1(input_buffer, output_buffer, scale, zero_point);
 		input_buffer++;
 		output_buffer++;
 	}
@@ -1167,7 +1283,7 @@ rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 static inline void
-__float32_to_float16_neon_f16x4(float32_t *input, float16_t *output)
+__float32_to_float16_neon_f16x4(const float32_t *input, float16_t *output)
 {
 	float32x4_t f32x4;
 	float16x4_t f16x4;
@@ -1183,7 +1299,7 @@ __float32_to_float16_neon_f16x4(float32_t *input, float16_t *output)
 }
 
 static inline void
-__float32_to_float16_neon_f16x1(float32_t *input, float16_t *output)
+__float32_to_float16_neon_f16x1(const float32_t *input, float16_t *output)
 {
 	float32x4_t f32x4;
 	float16x4_t f16x4;
@@ -1199,9 +1315,9 @@ __float32_to_float16_neon_f16x1(float32_t *input, float16_t *output)
 }
 
 int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_float16(const void *input, void *output, uint64_t nb_elements)
 {
-	float32_t *input_buffer;
+	const float32_t *input_buffer;
 	float16_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -1210,7 +1326,7 @@ rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float32_t *)input;
+	input_buffer = (const float32_t *)input;
 	output_buffer = (float16_t *)output;
 	vlen = 2 * sizeof(float32_t) / sizeof(float16_t);
 	nb_iterations = nb_elements / vlen;
@@ -1234,7 +1350,7 @@ rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
 }
 
 static inline void
-__float16_to_float32_neon_f32x4(float16_t *input, float32_t *output)
+__float16_to_float32_neon_f32x4(const float16_t *input, float32_t *output)
 {
 	float16x4_t f16x4;
 	float32x4_t f32x4;
@@ -1250,7 +1366,7 @@ __float16_to_float32_neon_f32x4(float16_t *input, float32_t *output)
 }
 
 static inline void
-__float16_to_float32_neon_f32x1(float16_t *input, float32_t *output)
+__float16_to_float32_neon_f32x1(const float16_t *input, float32_t *output)
 {
 	float16x4_t f16x4;
 	float32x4_t f32x4;
@@ -1266,9 +1382,9 @@ __float16_to_float32_neon_f32x1(float16_t *input, float32_t *output)
 }
 
 int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float16_to_float32(const void *input, void *output, uint64_t nb_elements)
 {
-	float16_t *input_buffer;
+	const float16_t *input_buffer;
 	float32_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -1277,7 +1393,7 @@ rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float16_t *)input;
+	input_buffer = (const float16_t *)input;
 	output_buffer = (float32_t *)output;
 	vlen = 2 * sizeof(float32_t) / sizeof(float16_t);
 	nb_iterations = nb_elements / vlen;
diff --git a/lib/mldev/mldev_utils_neon_bfloat16.c b/lib/mldev/mldev_utils_neon_bfloat16.c
index 8dec3fd8343..b8e68532669 100644
--- a/lib/mldev/mldev_utils_neon_bfloat16.c
+++ b/lib/mldev/mldev_utils_neon_bfloat16.c
@@ -18,7 +18,7 @@
 #ifdef __ARM_FEATURE_BF16
 
 static inline void
-__float32_to_bfloat16_neon_f16x4(float32_t *input, bfloat16_t *output)
+__float32_to_bfloat16_neon_f16x4(const float32_t *input, bfloat16_t *output)
 {
 	float32x4_t f32x4;
 	bfloat16x4_t bf16x4;
@@ -34,7 +34,7 @@ __float32_to_bfloat16_neon_f16x4(float32_t *input, bfloat16_t *output)
 }
 
 static inline void
-__float32_to_bfloat16_neon_f16x1(float32_t *input, bfloat16_t *output)
+__float32_to_bfloat16_neon_f16x1(const float32_t *input, bfloat16_t *output)
 {
 	float32x4_t f32x4;
 	bfloat16x4_t bf16x4;
@@ -50,9 +50,9 @@ __float32_to_bfloat16_neon_f16x1(float32_t *input, bfloat16_t *output)
 }
 
 int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_bfloat16(const void *input, void *output, uint64_t nb_elements)
 {
-	float32_t *input_buffer;
+	const float32_t *input_buffer;
 	bfloat16_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -61,7 +61,7 @@ rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float32_t *)input;
+	input_buffer = (const float32_t *)input;
 	output_buffer = (bfloat16_t *)output;
 	vlen = 2 * sizeof(float32_t) / sizeof(bfloat16_t);
 	nb_iterations = nb_elements / vlen;
@@ -85,7 +85,7 @@ rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
 }
 
 static inline void
-__bfloat16_to_float32_neon_f32x4(bfloat16_t *input, float32_t *output)
+__bfloat16_to_float32_neon_f32x4(const bfloat16_t *input, float32_t *output)
 {
 	bfloat16x4_t bf16x4;
 	float32x4_t f32x4;
@@ -101,7 +101,7 @@ __bfloat16_to_float32_neon_f32x4(bfloat16_t *input, float32_t *output)
 }
 
 static inline void
-__bfloat16_to_float32_neon_f32x1(bfloat16_t *input, float32_t *output)
+__bfloat16_to_float32_neon_f32x1(const bfloat16_t *input, float32_t *output)
 {
 	bfloat16x4_t bf16x4;
 	float32x4_t f32x4;
@@ -117,9 +117,9 @@ __bfloat16_to_float32_neon_f32x1(bfloat16_t *input, float32_t *output)
 }
 
 int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_bfloat16_to_float32(const void *input, void *output, uint64_t nb_elements)
 {
-	bfloat16_t *input_buffer;
+	const bfloat16_t *input_buffer;
 	float32_t *output_buffer;
 	uint64_t nb_iterations;
 	uint32_t vlen;
@@ -128,7 +128,7 @@ rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (bfloat16_t *)input;
+	input_buffer = (const bfloat16_t *)input;
 	output_buffer = (float32_t *)output;
 	vlen = 2 * sizeof(float32_t) / sizeof(bfloat16_t);
 	nb_iterations = nb_elements / vlen;
diff --git a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c
index 63a9900cc8c..e1fefdec3b4 100644
--- a/lib/mldev/mldev_utils_scalar.c
+++ b/lib/mldev/mldev_utils_scalar.c
@@ -10,9 +10,10 @@
  */
 
 int
-rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int8(const void *input, void *output, uint64_t nb_elements, float scale,
+			  int8_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int8_t *output_buffer;
 	uint64_t i;
 	int i32;
@@ -20,11 +21,11 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int8_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i32 = (int32_t)round((*input_buffer) * scale);
+		i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i32 < INT8_MIN)
 			i32 = INT8_MIN;
@@ -42,20 +43,21 @@ rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *
 }
 
 int
-rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			  int8_t zero_point)
 {
-	int8_t *input_buffer;
+	const int8_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int8_t *)input;
+	input_buffer = (const int8_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -65,9 +67,10 @@ rte_ml_io_int8_to_float32(float scale, uint64_t nb_elements, void *input, void *
 }
 
 int
-rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint8(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint8_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint8_t *output_buffer;
 	int32_t i32;
 	uint64_t i;
@@ -75,11 +78,11 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint8_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i32 = (int32_t)round((*input_buffer) * scale);
+		i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i32 < 0)
 			i32 = 0;
@@ -97,20 +100,21 @@ rte_ml_io_float32_to_uint8(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint8_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   uint8_t zero_point)
 {
-	uint8_t *input_buffer;
+	const uint8_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint8_t *)input;
+	input_buffer = (const uint8_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -120,9 +124,10 @@ rte_ml_io_uint8_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int16(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int16_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int16_t *output_buffer;
 	int32_t i32;
 	uint64_t i;
@@ -130,11 +135,11 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int16_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i32 = (int32_t)round((*input_buffer) * scale);
+		i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i32 < INT16_MIN)
 			i32 = INT16_MIN;
@@ -152,20 +157,21 @@ rte_ml_io_float32_to_int16(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int16_t zero_point)
 {
-	int16_t *input_buffer;
+	const int16_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int16_t *)input;
+	input_buffer = (const int16_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -175,9 +181,10 @@ rte_ml_io_int16_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint16(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint16_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint16_t *output_buffer;
 	int32_t i32;
 	uint64_t i;
@@ -185,11 +192,11 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint16_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i32 = (int32_t)round((*input_buffer) * scale);
+		i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i32 < 0)
 			i32 = 0;
@@ -207,20 +214,21 @@ rte_ml_io_float32_to_uint16(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint16_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint16_t zero_point)
 {
-	uint16_t *input_buffer;
+	const uint16_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint16_t *)input;
+	input_buffer = (const uint16_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -230,20 +238,21 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int32_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int32_t *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int32_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = (int32_t)round((*input_buffer) * scale);
+		*output_buffer = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -253,20 +262,21 @@ rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int32_t zero_point)
 {
-	int32_t *input_buffer;
+	const int32_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int32_t *)input;
+	input_buffer = (const int32_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -276,9 +286,10 @@ rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint32(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint32_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint32_t *output_buffer;
 	int32_t i32;
 	uint64_t i;
@@ -286,11 +297,11 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint32_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i32 = (int32_t)round((*input_buffer) * scale);
+		i32 = (int32_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i32 < 0)
 			i32 = 0;
@@ -305,20 +316,21 @@ rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint32_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint32_t zero_point)
 {
-	uint32_t *input_buffer;
+	const uint32_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint32_t *)input;
+	input_buffer = (const uint32_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -328,20 +340,21 @@ rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_int64(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int64_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	int64_t *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (int64_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = (int64_t)round((*input_buffer) * scale);
+		*output_buffer = (int64_t)(round(*input_buffer / scale) + zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -351,20 +364,21 @@ rte_ml_io_float32_to_int64(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_int64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			   int64_t zero_point)
 {
-	int64_t *input_buffer;
+	const int64_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (int64_t *)input;
+	input_buffer = (const int64_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -374,9 +388,10 @@ rte_ml_io_int64_to_float32(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_uint64(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint64_t zero_point)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint64_t *output_buffer;
 	int64_t i64;
 	uint64_t i;
@@ -384,11 +399,11 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint64_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		i64 = (int64_t)round((*input_buffer) * scale);
+		i64 = (int64_t)(round(*input_buffer / scale) + zero_point);
 
 		if (i64 < 0)
 			i64 = 0;
@@ -403,20 +418,21 @@ rte_ml_io_float32_to_uint64(float scale, uint64_t nb_elements, void *input, void
 }
 
 int
-rte_ml_io_uint64_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+rte_ml_io_uint64_to_float32(const void *input, void *output, uint64_t nb_elements, float scale,
+			    uint64_t zero_point)
 {
-	uint64_t *input_buffer;
+	const uint64_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint64_t *)input;
+	input_buffer = (const uint64_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
-		*output_buffer = scale * (float)(*input_buffer);
+		*output_buffer = scale * (float)(*input_buffer - zero_point);
 
 		input_buffer++;
 		output_buffer++;
@@ -548,16 +564,16 @@ __float32_to_float16_scalar_rtn(float x)
 }
 
 int
-rte_ml_io_float32_to_float16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_float16(const void *input, void *output, uint64_t nb_elements)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint16_t *output_buffer;
 	uint64_t i;
 
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint16_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
@@ -632,16 +648,16 @@ __float16_to_float32_scalar_rtx(uint16_t f16)
 }
 
 int
-rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float16_to_float32(const void *input, void *output, uint64_t nb_elements)
 {
-	uint16_t *input_buffer;
+	const uint16_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint16_t *)input;
+	input_buffer = (const uint16_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
diff --git a/lib/mldev/mldev_utils_scalar_bfloat16.c b/lib/mldev/mldev_utils_scalar_bfloat16.c
index 14374163131..3f93272518f 100644
--- a/lib/mldev/mldev_utils_scalar_bfloat16.c
+++ b/lib/mldev/mldev_utils_scalar_bfloat16.c
@@ -92,16 +92,16 @@ __float32_to_bfloat16_scalar_rtn(float x)
 }
 
 int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_float32_to_bfloat16(const void *input, void *output, uint64_t nb_elements)
 {
-	float *input_buffer;
+	const float *input_buffer;
 	uint16_t *output_buffer;
 	uint64_t i;
 
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (float *)input;
+	input_buffer = (const float *)input;
 	output_buffer = (uint16_t *)output;
 
 	for (i = 0; i < nb_elements; i++) {
@@ -174,16 +174,16 @@ __bfloat16_to_float32_scalar_rtx(uint16_t f16)
 }
 
 int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
+rte_ml_io_bfloat16_to_float32(const void *input, void *output, uint64_t nb_elements)
 {
-	uint16_t *input_buffer;
+	const uint16_t *input_buffer;
 	float *output_buffer;
 	uint64_t i;
 
 	if ((nb_elements == 0) || (input == NULL) || (output == NULL))
 		return -EINVAL;
 
-	input_buffer = (uint16_t *)input;
+	input_buffer = (const uint16_t *)input;
 	output_buffer = (float *)output;
 
 	for (i = 0; i < nb_elements; i++) {
diff --git a/lib/mldev/rte_mldev.h b/lib/mldev/rte_mldev.h
index 634af3d5e1a..8b595839056 100644
--- a/lib/mldev/rte_mldev.h
+++ b/lib/mldev/rte_mldev.h
@@ -1013,6 +1013,468 @@ rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
 
 /* IO operations */
 
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed 8-bit
+ * integer format (INT8).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i8
+ *      Output buffer to store INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int8(const void *fp32, void *i8, uint64_t nb_elements, float scale,
+			  int8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 8-bit integer format (INT8) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i8
+ *      Input buffer containing INT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int8_to_float32(const void *i8, void *fp32, uint64_t nb_elements, float scale,
+			  int8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 8-bit integer format (UINT8).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui8
+ *      Output buffer to store UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint8(const void *fp32, void *ui8, uint64_t nb_elements, float scale,
+			   uint8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 8-bit integer format (UINT8) to single precision
+ * floating format (float32).
+ *
+ * @param[in] ui8
+ *      Input buffer containing UINT8 numbers. Size of buffer is equal to (nb_elements * 1) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint8_to_float32(const void *ui8, void *fp32, uint64_t nb_elements, float scale,
+			   uint8_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 16-bit integer format (INT16).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i16
+ *      Output buffer to store INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int16(const void *fp32, void *i16, uint64_t nb_elements, float scale,
+			   int16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 16-bit integer format (INT16) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i16
+ *      Input buffer containing INT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int16_to_float32(const void *i16, void *fp32, uint64_t nb_elements, float scale,
+			   int16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 16-bit integer format (UINT16).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui16
+ *      Output buffer to store UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint16(const void *fp32, void *ui16, uint64_t nb_elements, float scale,
+			    uint16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 16-bit integer format (UINT16) to single
+ * precision floating format (float32).
+ *
+ * @param[in] ui16
+ *      Input buffer containing UINT16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint16_to_float32(const void *ui16, void *fp32, uint64_t nb_elements, float scale,
+			    uint16_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 32-bit integer format (INT32).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i32
+ *      Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int32(const void *fp32, void *i32, uint64_t nb_elements, float scale,
+			   int32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i32
+ *      Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+
+__rte_experimental
+int
+rte_ml_io_int32_to_float32(const void *i32, void *fp32, uint64_t nb_elements, float scale,
+			   int32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 32-bit integer format (UINT32).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui32
+ *      Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint32(const void *fp32, void *ui32, uint64_t nb_elements, float scale,
+			    uint32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single
+ * precision floating format (float32).
+ *
+ * @param[in] ui32
+ *      Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint32_to_float32(const void *ui32, void *fp32, uint64_t nb_elements, float scale,
+			    uint32_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 64-bit integer format (INT64).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] i64
+ *      Output buffer to store INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_int64(const void *fp32, void *i64, uint64_t nb_elements, float scale,
+			   int64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in signed 64-bit integer format (INT64) to single precision
+ * floating format (float32).
+ *
+ * @param[in] i64
+ *      Input buffer containing INT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_int64_to_float32(const void *i64, void *fp32, uint64_t nb_elements, float scale,
+			   int64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 64-bit integer format (UINT64).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] ui64
+ *      Output buffer to store UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_uint64(const void *fp32, void *ui64, uint64_t nb_elements, float scale,
+			    uint64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in unsigned 64-bit integer format (UINT64) to single
+ *precision floating format (float32).
+ *
+ * @param[in] ui64
+ *      Input buffer containing UINT64 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] zero_point
+ *      Zero point for conversion.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_uint64_to_float32(const void *ui64, void *fp32, uint64_t nb_elements, float scale,
+			    uint64_t zero_point);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to half
+ * precision floating point format (FP16).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
+ * @param[out] fp16
+ *      Output buffer to store float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_float16(const void *fp32, void *fp16, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in half precision floating format (FP16) to single precision
+ * floating point format (float32).
+ *
+ * @param[in] fp16
+ *      Input buffer containing float16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float16_to_float32(const void *fp16, void *fp32, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in single precision floating format (float32) to brain
+ * floating point format (bfloat16).
+ *
+ * @param[in] fp32
+ *      Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements *4) bytes.
+ * @param[out] bf16
+ *      Output buffer to store bfloat16 numbers. Size of buffer is equal to (nb_elements * 2) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_float32_to_bfloat16(const void *fp32, void *bf16, uint64_t nb_elements);
+
+/**
+ * Convert a buffer containing numbers in brain floating point format (bfloat16) to single precision
+ * floating point format (float32).
+ *
+ * @param[in] bf16
+ *      Input buffer containing bfloat16 numbers. Size of buffer is equal to (nb_elements * 2)
+ * bytes.
+ * @param[out] fp32
+ *      Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[in] nb_elements
+ *      Number of elements in the buffer.
+ *
+ * @return
+ *      - 0, Success.
+ *      - < 0, Error code on failure.
+ */
+__rte_experimental
+int
+rte_ml_io_bfloat16_to_float32(const void *bf16, void *fp32, uint64_t nb_elements);
+
 /**
  * Quantize input data.
  *
diff --git a/lib/mldev/version.map b/lib/mldev/version.map
index 84bdd6c3004..6f0a70b4bd1 100644
--- a/lib/mldev/version.map
+++ b/lib/mldev/version.map
@@ -23,6 +23,26 @@ EXPERIMENTAL {
 	rte_ml_dev_xstats_names_get;
 	rte_ml_dev_xstats_reset;
 	rte_ml_enqueue_burst;
+	rte_ml_io_float32_to_int8;
+	rte_ml_io_int8_to_float32;
+	rte_ml_io_float32_to_uint8;
+	rte_ml_io_uint8_to_float32;
+	rte_ml_io_float32_to_int16;
+	rte_ml_io_int16_to_float32;
+	rte_ml_io_float32_to_uint16;
+	rte_ml_io_uint16_to_float32;
+	rte_ml_io_float32_to_int32;
+	rte_ml_io_int32_to_float32;
+	rte_ml_io_float32_to_uint32;
+	rte_ml_io_uint32_to_float32;
+	rte_ml_io_float32_to_int64;
+	rte_ml_io_int64_to_float32;
+	rte_ml_io_float32_to_uint64;
+	rte_ml_io_uint64_to_float32;
+	rte_ml_io_float32_to_float16;
+	rte_ml_io_float16_to_float32;
+	rte_ml_io_float32_to_bfloat16;
+	rte_ml_io_bfloat16_to_float32;
 	rte_ml_io_dequantize;
 	rte_ml_io_quantize;
 	rte_ml_model_info_get;
@@ -50,24 +70,4 @@ INTERNAL {
 
 	rte_ml_io_type_size_get;
 	rte_ml_io_type_to_str;
-	rte_ml_io_float32_to_int8;
-	rte_ml_io_int8_to_float32;
-	rte_ml_io_float32_to_uint8;
-	rte_ml_io_uint8_to_float32;
-	rte_ml_io_float32_to_int16;
-	rte_ml_io_int16_to_float32;
-	rte_ml_io_float32_to_uint16;
-	rte_ml_io_uint16_to_float32;
-	rte_ml_io_float32_to_int32;
-	rte_ml_io_int32_to_float32;
-	rte_ml_io_float32_to_uint32;
-	rte_ml_io_uint32_to_float32;
-	rte_ml_io_float32_to_int64;
-	rte_ml_io_int64_to_float32;
-	rte_ml_io_float32_to_uint64;
-	rte_ml_io_uint64_to_float32;
-	rte_ml_io_float32_to_float16;
-	rte_ml_io_float16_to_float32;
-	rte_ml_io_float32_to_bfloat16;
-	rte_ml_io_bfloat16_to_float32;
 };
-- 
2.45.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-10-17 21:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-31  6:32 [PATCH v1 1/1] mldev: introduce data type conversion functions Srikanth Yalavarthi
2024-10-17 21:16 ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).