DPDK patches and discussions
 help / color / mirror / Atom feed
From: Srikanth Yalavarthi <syalavarthi@marvell.com>
To: Srikanth Yalavarthi <syalavarthi@marvell.com>,
	Ruifeng Wang <ruifeng.wang@arm.com>
Cc: <dev@dpdk.org>, <aprabhu@marvell.com>, <sshankarnara@marvell.com>,
	<ptakkar@marvell.com>
Subject: [PATCH 1/3] mldev: add conversion routines for 32-bit integers
Date: Sun, 7 Jan 2024 07:28:10 -0800	[thread overview]
Message-ID: <20240107152813.2668-2-syalavarthi@marvell.com> (raw)
In-Reply-To: <20240107152813.2668-1-syalavarthi@marvell.com>

Added routines to convert data from 32-bit integer type to
float32_t and vice-versa.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
---
 lib/mldev/mldev_utils.h        |  92 +++++++++++++
 lib/mldev/mldev_utils_neon.c   | 242 +++++++++++++++++++++++++++++++++
 lib/mldev/mldev_utils_scalar.c |  98 +++++++++++++
 lib/mldev/version.map          |   4 +
 4 files changed, 436 insertions(+)

diff --git a/lib/mldev/mldev_utils.h b/lib/mldev/mldev_utils.h
index 220afb42f0d..1d041531b43 100644
--- a/lib/mldev/mldev_utils.h
+++ b/lib/mldev/mldev_utils.h
@@ -236,6 +236,98 @@ __rte_internal
 int
 rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
 
+/**
+ * @internal
+ *
+ * Convert a buffer containing numbers in single precision floating format (float32) to signed
+ * 32-bit integer format (INT32).
+ *
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] nb_elements
+ *	Number of elements in the buffer.
+ * @param[in] input
+ *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] output
+ *	Output buffer to store INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ *
+ * @return
+ *	- 0, Success.
+ *	- < 0, Error code on failure.
+ */
+__rte_internal
+int
+rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output);
+
+/**
+ * @internal
+ *
+ * Convert a buffer containing numbers in signed 32-bit integer format (INT32) to single precision
+ * floating format (float32).
+ *
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] nb_elements
+ *	Number of elements in the buffer.
+ * @param[in] input
+ *	Input buffer containing INT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] output
+ *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ *
+ * @return
+ *	- 0, Success.
+ *	- < 0, Error code on failure.
+ */
+__rte_internal
+int
+rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
+
+/**
+ * @internal
+ *
+ * Convert a buffer containing numbers in single precision floating format (float32) to unsigned
+ * 32-bit integer format (UINT32).
+ *
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] nb_elements
+ *	Number of elements in the buffer.
+ * @param[in] input
+ *	Input buffer containing float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] output
+ *	Output buffer to store UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ *
+ * @return
+ *	- 0, Success.
+ *	- < 0, Error code on failure.
+ */
+__rte_internal
+int
+rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output);
+
+/**
+ * @internal
+ *
+ * Convert a buffer containing numbers in unsigned 32-bit integer format (UINT32) to single
+ * precision floating format (float32).
+ *
+ * @param[in] scale
+ *      Scale factor for conversion.
+ * @param[in] nb_elements
+ *	Number of elements in the buffer.
+ * @param[in] input
+ *	Input buffer containing UINT32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ * @param[out] output
+ *	Output buffer to store float32 numbers. Size of buffer is equal to (nb_elements * 4) bytes.
+ *
+ * @return
+ *	- 0, Success.
+ *	- < 0, Error code on failure.
+ */
+__rte_internal
+int
+rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output);
+
 /**
  * @internal
  *
diff --git a/lib/mldev/mldev_utils_neon.c b/lib/mldev/mldev_utils_neon.c
index c7baec012b8..250fa43fa73 100644
--- a/lib/mldev/mldev_utils_neon.c
+++ b/lib/mldev/mldev_utils_neon.c
@@ -600,6 +600,248 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	return 0;
 }
 
+static inline void
+__float32_to_int32_neon_s32x4(float scale, float *input, int32_t *output)
+{
+	float32x4_t f32x4;
+	int32x4_t s32x4;
+
+	/* load 4 x float elements */
+	f32x4 = vld1q_f32(input);
+
+	/* scale */
+	f32x4 = vmulq_n_f32(f32x4, scale);
+
+	/* convert to int32x4_t using round to nearest with ties away rounding mode */
+	s32x4 = vcvtaq_s32_f32(f32x4);
+
+	/* store 4 elements */
+	vst1q_s32(output, s32x4);
+}
+
+static inline void
+__float32_to_int32_neon_s32x1(float scale, float *input, int32_t *output)
+{
+	/* scale and convert, round to nearest with ties away rounding mode */
+	*output = vcvtas_s32_f32(scale * (*input));
+}
+
+int
+rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	float *input_buffer;
+	int32_t *output_buffer;
+	uint64_t nb_iterations;
+	uint32_t vlen;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (float *)input;
+	output_buffer = (int32_t *)output;
+	vlen = 2 * sizeof(float) / sizeof(int32_t);
+	nb_iterations = nb_elements / vlen;
+
+	/* convert vlen elements in each iteration */
+	for (i = 0; i < nb_iterations; i++) {
+		__float32_to_int32_neon_s32x4(scale, input_buffer, output_buffer);
+		input_buffer += vlen;
+		output_buffer += vlen;
+	}
+
+	/* convert leftover elements */
+	i = i * vlen;
+	for (; i < nb_elements; i++) {
+		__float32_to_int32_neon_s32x1(scale, input_buffer, output_buffer);
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+static inline void
+__int32_to_float32_neon_f32x4(float scale, int32_t *input, float *output)
+{
+	float32x4_t f32x4;
+	int32x4_t s32x4;
+
+	/* load 4 x int32_t elements */
+	s32x4 = vld1q_s32(input);
+
+	/* convert int32_t to float */
+	f32x4 = vcvtq_f32_s32(s32x4);
+
+	/* scale */
+	f32x4 = vmulq_n_f32(f32x4, scale);
+
+	/* store float32x4_t */
+	vst1q_f32(output, f32x4);
+}
+
+static inline void
+__int32_to_float32_neon_f32x1(float scale, int32_t *input, float *output)
+{
+	*output = scale * vcvts_f32_s32(*input);
+}
+
+int
+rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	int32_t *input_buffer;
+	float *output_buffer;
+	uint64_t nb_iterations;
+	uint32_t vlen;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (int32_t *)input;
+	output_buffer = (float *)output;
+	vlen = 2 * sizeof(float) / sizeof(int32_t);
+	nb_iterations = nb_elements / vlen;
+
+	/* convert vlen elements in each iteration */
+	for (i = 0; i < nb_iterations; i++) {
+		__int32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		input_buffer += vlen;
+		output_buffer += vlen;
+	}
+
+	/* convert leftover elements */
+	i = i * vlen;
+	for (; i < nb_elements; i++) {
+		__int32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+static inline void
+__float32_to_uint32_neon_u32x4(float scale, float *input, uint32_t *output)
+{
+	float32x4_t f32x4;
+	uint32x4_t u32x4;
+
+	/* load 4 float elements */
+	f32x4 = vld1q_f32(input);
+
+	/* scale */
+	f32x4 = vmulq_n_f32(f32x4, scale);
+
+	/* convert using round to nearest with ties to away rounding mode */
+	u32x4 = vcvtaq_u32_f32(f32x4);
+
+	/* store 4 elements */
+	vst1q_u32(output, u32x4);
+}
+
+static inline void
+__float32_to_uint32_neon_u32x1(float scale, float *input, uint32_t *output)
+{
+	/* scale and convert, round to nearest with ties away rounding mode */
+	*output = vcvtas_u32_f32(scale * (*input));
+}
+
+int
+rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	float *input_buffer;
+	uint32_t *output_buffer;
+	uint64_t nb_iterations;
+	uint64_t vlen;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (float *)input;
+	output_buffer = (uint32_t *)output;
+	vlen = 2 * sizeof(float) / sizeof(uint32_t);
+	nb_iterations = nb_elements / vlen;
+
+	/* convert vlen elements in each iteration */
+	for (i = 0; i < nb_iterations; i++) {
+		__float32_to_uint32_neon_u32x4(scale, input_buffer, output_buffer);
+		input_buffer += vlen;
+		output_buffer += vlen;
+	}
+
+	/* convert leftover elements */
+	i = i * vlen;
+	for (; i < nb_elements; i++) {
+		__float32_to_uint32_neon_u32x1(scale, input_buffer, output_buffer);
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+static inline void
+__uint32_to_float32_neon_f32x4(float scale, uint32_t *input, float *output)
+{
+	float32x4_t f32x4;
+	uint32x4_t u32x4;
+
+	/* load 4 x uint32_t elements */
+	u32x4 = vld1q_u32(input);
+
+	/* convert uint32_t to float */
+	f32x4 = vcvtq_f32_u32(u32x4);
+
+	/* scale */
+	f32x4 = vmulq_n_f32(f32x4, scale);
+
+	/* store float32x4_t */
+	vst1q_f32(output, f32x4);
+}
+
+static inline void
+__uint32_to_float32_neon_f32x1(float scale, uint32_t *input, float *output)
+{
+	*output = scale * vcvts_f32_u32(*input);
+}
+
+int
+rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	uint32_t *input_buffer;
+	float *output_buffer;
+	uint64_t nb_iterations;
+	uint32_t vlen;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (uint32_t *)input;
+	output_buffer = (float *)output;
+	vlen = 2 * sizeof(float) / sizeof(uint32_t);
+	nb_iterations = nb_elements / vlen;
+
+	/* convert vlen elements in each iteration */
+	for (i = 0; i < nb_iterations; i++) {
+		__uint32_to_float32_neon_f32x4(scale, input_buffer, output_buffer);
+		input_buffer += vlen;
+		output_buffer += vlen;
+	}
+
+	/* convert leftover elements */
+	i = i * vlen;
+	for (; i < nb_elements; i++) {
+		__uint32_to_float32_neon_f32x1(scale, input_buffer, output_buffer);
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
 static inline void
 __float32_to_float16_neon_f16x4(float32_t *input, float16_t *output)
 {
diff --git a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c
index 4d6cb880240..af1a3a103b2 100644
--- a/lib/mldev/mldev_utils_scalar.c
+++ b/lib/mldev/mldev_utils_scalar.c
@@ -229,6 +229,104 @@ rte_ml_io_uint16_to_float32(float scale, uint64_t nb_elements, void *input, void
 	return 0;
 }
 
+int
+rte_ml_io_float32_to_int32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	float *input_buffer;
+	int32_t *output_buffer;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (float *)input;
+	output_buffer = (int32_t *)output;
+
+	for (i = 0; i < nb_elements; i++) {
+		*output_buffer = (int32_t)round((*input_buffer) * scale);
+
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+int
+rte_ml_io_int32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	int32_t *input_buffer;
+	float *output_buffer;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (int32_t *)input;
+	output_buffer = (float *)output;
+
+	for (i = 0; i < nb_elements; i++) {
+		*output_buffer = scale * (float)(*input_buffer);
+
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+int
+rte_ml_io_float32_to_uint32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	float *input_buffer;
+	uint32_t *output_buffer;
+	int32_t i32;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (float *)input;
+	output_buffer = (uint32_t *)output;
+
+	for (i = 0; i < nb_elements; i++) {
+		i32 = (int32_t)round((*input_buffer) * scale);
+
+		if (i32 < 0)
+			i32 = 0;
+
+		*output_buffer = (uint32_t)i32;
+
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
+int
+rte_ml_io_uint32_to_float32(float scale, uint64_t nb_elements, void *input, void *output)
+{
+	uint32_t *input_buffer;
+	float *output_buffer;
+	uint64_t i;
+
+	if ((scale == 0) || (nb_elements == 0) || (input == NULL) || (output == NULL))
+		return -EINVAL;
+
+	input_buffer = (uint32_t *)input;
+	output_buffer = (float *)output;
+
+	for (i = 0; i < nb_elements; i++) {
+		*output_buffer = scale * (float)(*input_buffer);
+
+		input_buffer++;
+		output_buffer++;
+	}
+
+	return 0;
+}
+
 /* Convert a single precision floating point number (float32) into a half precision
  * floating point number (float16) using round to nearest rounding mode.
  */
diff --git a/lib/mldev/version.map b/lib/mldev/version.map
index 99841db6aa9..2e8f1555225 100644
--- a/lib/mldev/version.map
+++ b/lib/mldev/version.map
@@ -57,6 +57,10 @@ INTERNAL {
 	rte_ml_io_int16_to_float32;
 	rte_ml_io_float32_to_uint16;
 	rte_ml_io_uint16_to_float32;
+	rte_ml_io_float32_to_int32;
+	rte_ml_io_int32_to_float32;
+	rte_ml_io_float32_to_uint32;
+	rte_ml_io_uint32_to_float32;
 	rte_ml_io_float32_to_float16;
 	rte_ml_io_float16_to_float32;
 	rte_ml_io_float32_to_bfloat16;
-- 
2.42.0


  reply	other threads:[~2024-01-07 15:28 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-07 15:28 [PATCH 0/3] add support for additional data types Srikanth Yalavarthi
2024-01-07 15:28 ` Srikanth Yalavarthi [this message]
2024-01-07 15:28 ` [PATCH 2/3] mldev: add support for 64-integer data type Srikanth Yalavarthi
2024-01-07 15:28 ` [PATCH 3/3] ml/cnxk: add support for additional integer types Srikanth Yalavarthi
2024-02-18 22:14 ` [PATCH 0/3] add support for additional data types Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240107152813.2668-2-syalavarthi@marvell.com \
    --to=syalavarthi@marvell.com \
    --cc=aprabhu@marvell.com \
    --cc=dev@dpdk.org \
    --cc=ptakkar@marvell.com \
    --cc=ruifeng.wang@arm.com \
    --cc=sshankarnara@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).