* |WARNING| pw125066 [PATCH v2 1/1] mldev: split bfloat16 routines to separate files
[not found] <20230313120306.28911-1-syalavarthi@marvell.com>
@ 2023-03-13 11:50 ` qemudev
2023-03-13 12:03 ` |SUCCESS| " checkpatch
1 sibling, 0 replies; 3+ messages in thread
From: qemudev @ 2023-03-13 11:50 UTC (permalink / raw)
To: test-report; +Cc: Srikanth Yalavarthi, zhoumin
Test-Label: loongarch-compilation
Test-Status: WARNING
http://dpdk.org/patch/125066
_apply patch failure_
Submitter: Srikanth Yalavarthi <syalavarthi@marvell.com>
Date: Mon, 13 Mar 2023 05:03:06 -0700
DPDK git baseline: Repo:dpdk
Branch: main
CommitID: baf13c3135d0c5998fff7edc23fb89412dc89246
Apply patch set 125066 failed:
Checking patch lib/mldev/meson.build...
error: while searching for:
'mldev_utils.c',
)
if dpdk_conf.has('RTE_ARCH_ARM64')
sources += files('mldev_utils_neon.c')
else
sources += files('mldev_utils_scalar.c')
endif
headers = files(
'rte_mldev.h',
)
error: patch failed: lib/mldev/meson.build:7
error: lib/mldev/meson.build: patch does not apply
Checking patch lib/mldev/mldev_utils_neon.c...
Checking patch lib/mldev/mldev_utils_neon_bfloat16.c...
Checking patch lib/mldev/mldev_utils_scalar.c...
error: while searching for:
* Copyright (c) 2022 Marvell.
*/
#include <errno.h>
#include <math.h>
#include <stdint.h>
#include "mldev_utils.h"
/* Description:
* This file implements scalar versions of Machine Learning utility functions used to convert data
* types from higher precision to lower precision and vice-versa.
*/
#ifndef BIT
#define BIT(nr) (1UL << (nr))
#endif
#ifndef BITS_PER_LONG
#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
#endif
#ifndef GENMASK_U32
#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
#endif
/* float32: bit index of MSB & LSB of sign, exponent and mantissa */
#define FP32_LSB_M 0
#define FP32_MSB_M 22
#define FP32_LSB_E 23
#define FP32_MSB_E 30
#define FP32_LSB_S 31
#define FP32_MSB_S 31
/* float32: bitmask for sign, exponent and mantissa */
#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S)
#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E)
#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M)
/* float16: bit index of MSB & LSB of sign, exponent and mantissa */
#define FP16_LSB_M 0
#define FP16_MSB_M 9
#define FP16_LSB_E 10
#define FP16_MSB_E 14
#define FP16_LSB_S 15
#define FP16_MSB_S 15
/* float16: bitmask for sign, exponent and mantissa */
#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S)
#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E)
#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M)
/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */
#define BF16_LSB_M 0
#define BF16_MSB_M 6
#define BF16_LSB_E 7
#define BF16_MSB_E 14
#define BF16_LSB_S 15
#define BF16_MSB_S 15
/* bfloat16: bitmask for sign, exponent and mantissa */
#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S)
#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E)
#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M)
/* Exponent bias */
#define FP32_BIAS_E 127
#define FP16_BIAS_E 15
#define BF16_BIAS_E 127
#define FP32_PACK(sign, exponent, mantissa) \
(((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa))
#define FP16_PACK(sign, exponent, mantissa) \
(((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa))
#define BF16_PACK(sign, exponent, mantissa) \
(((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa))
/* Represent float32 as float and uint32_t */
union float32 {
float f;
uint32_t u;
};
int
rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
{
error: patch failed: lib/mldev/mldev_utils_scalar.c:2
error: lib/mldev/mldev_utils_scalar.c: patch does not apply
Checking patch lib/mldev/mldev_utils_scalar.h...
Checking patch lib/mldev/mldev_utils_scalar_bfloat16.c...
^ permalink raw reply [flat|nested] 3+ messages in thread
* |SUCCESS| pw125066 [PATCH v2 1/1] mldev: split bfloat16 routines to separate files
[not found] <20230313120306.28911-1-syalavarthi@marvell.com>
2023-03-13 11:50 ` |WARNING| pw125066 [PATCH v2 1/1] mldev: split bfloat16 routines to separate files qemudev
@ 2023-03-13 12:03 ` checkpatch
1 sibling, 0 replies; 3+ messages in thread
From: checkpatch @ 2023-03-13 12:03 UTC (permalink / raw)
To: test-report
Test-Label: checkpatch
Test-Status: SUCCESS
http://dpdk.org/patch/125066
_coding style OK_
^ permalink raw reply [flat|nested] 3+ messages in thread
* |WARNING| pw125066 [PATCH] [v2, 1/1] mldev: split bfloat16 routines to separate files
@ 2023-03-13 14:58 dpdklab
0 siblings, 0 replies; 3+ messages in thread
From: dpdklab @ 2023-03-13 14:58 UTC (permalink / raw)
To: test-report; +Cc: dpdk-test-reports
Test-Label: iol-testing
Test-Status: WARNING
http://dpdk.org/patch/125066
_apply patch failure_
Submitter: Srikanth Yalavarthi <syalavarthi@marvell.com>
Date: Monday, March 13 2023 12:03:06
Applied on: CommitID:baf13c3135d0c5998fff7edc23fb89412dc89246
Apply patch set 125066 failed:
Checking patch lib/mldev/meson.build...
error: while searching for:
'mldev_utils.c',
)
if dpdk_conf.has('RTE_ARCH_ARM64')
sources += files('mldev_utils_neon.c')
else
sources += files('mldev_utils_scalar.c')
endif
headers = files(
'rte_mldev.h',
)
error: patch failed: lib/mldev/meson.build:7
Checking patch lib/mldev/mldev_utils_neon.c...
Checking patch lib/mldev/mldev_utils_neon_bfloat16.c...
Checking patch lib/mldev/mldev_utils_scalar.c...
error: while searching for:
* Copyright (c) 2022 Marvell.
*/
#include <errno.h>
#include <math.h>
#include <stdint.h>
#include "mldev_utils.h"
/* Description:
* This file implements scalar versions of Machine Learning utility functions used to convert data
* types from higher precision to lower precision and vice-versa.
*/
#ifndef BIT
#define BIT(nr) (1UL << (nr))
#endif
#ifndef BITS_PER_LONG
#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
#endif
#ifndef GENMASK_U32
#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
#endif
/* float32: bit index of MSB & LSB of sign, exponent and mantissa */
#define FP32_LSB_M 0
#define FP32_MSB_M 22
#define FP32_LSB_E 23
#define FP32_MSB_E 30
#define FP32_LSB_S 31
#define FP32_MSB_S 31
/* float32: bitmask for sign, exponent and mantissa */
#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S)
#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E)
#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M)
/* float16: bit index of MSB & LSB of sign, exponent and mantissa */
#define FP16_LSB_M 0
#define FP16_MSB_M 9
#define FP16_LSB_E 10
#define FP16_MSB_E 14
#define FP16_LSB_S 15
#define FP16_MSB_S 15
/* float16: bitmask for sign, exponent and mantissa */
#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S)
#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E)
#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M)
/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */
#define BF16_LSB_M 0
#define BF16_MSB_M 6
#define BF16_LSB_E 7
#define BF16_MSB_E 14
#define BF16_LSB_S 15
#define BF16_MSB_S 15
/* bfloat16: bitmask for sign, exponent and mantissa */
#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S)
#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E)
#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M)
/* Exponent bias */
#define FP32_BIAS_E 127
#define FP16_BIAS_E 15
#define BF16_BIAS_E 127
#define FP32_PACK(sign, exponent, mantissa) \
(((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa))
#define FP16_PACK(sign, exponent, mantissa) \
(((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa))
#define BF16_PACK(sign, exponent, mantissa) \
(((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa))
/* Represent float32 as float and uint32_t */
union float32 {
float f;
uint32_t u;
};
int
rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
{
error: patch failed: lib/mldev/mldev_utils_scalar.c:2
error: while searching for:
return 0;
}
/* Convert a single precision floating point number (float32) into a
* brain float number (bfloat16) using round to nearest rounding mode.
*/
static uint16_t
__float32_to_bfloat16_scalar_rtn(float x)
{
union float32 f32; /* float32 input */
uint32_t f32_s; /* float32 sign */
uint32_t f32_e; /* float32 exponent */
uint32_t f32_m; /* float32 mantissa */
uint16_t b16_s; /* float16 sign */
uint16_t b16_e; /* float16 exponent */
uint16_t b16_m; /* float16 mantissa */
uint32_t tbits; /* number of truncated bits */
uint16_t u16; /* float16 output */
f32.f = x;
f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
b16_s = f32_s;
b16_e = 0;
b16_m = 0;
switch (f32_e) {
case (0): /* float32: zero or subnormal number */
b16_e = 0;
if (f32_m == 0) /* zero */
b16_m = 0;
else /* subnormal float32 number, normal bfloat16 */
goto bf16_normal;
break;
case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
b16_e = BF16_MASK_E >> BF16_LSB_E;
if (f32_m == 0) { /* infinity */
b16_m = 0;
} else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M);
b16_m |= BIT(BF16_MSB_M);
}
break;
default: /* float32: normal number, normal bfloat16 */
goto bf16_normal;
}
goto bf16_pack;
bf16_normal:
b16_e = f32_e;
tbits = FP32_MSB_M - BF16_MSB_M;
b16_m = f32_m >> tbits;
/* if non-leading truncated bits are set */
if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
b16_m++;
/* if overflow into exponent */
if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
b16_e++;
} else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
/* if only leading truncated bit is set */
if ((b16_m & 0x1) == 0x1) {
b16_m++;
/* if overflow into exponent */
if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
b16_e++;
}
}
b16_m = b16_m & BF16_MASK_M;
bf16_pack:
u16 = BF16_PACK(b16_s, b16_e, b16_m);
return u16;
}
int
rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
{
float *input_buffer;
uint16_t *output_buffer;
uint64_t i;
if ((nb_elements == 0) || (input == NULL) || (output == NULL))
return -EINVAL;
input_buffer = (float *)input;
output_buffer = (uint16_t *)output;
for (i = 0; i < nb_elements; i++) {
*output_buffer = __float32_to_bfloat16_scalar_rtn(*input_buffer);
input_buffer = input_buffer + 1;
output_buffer = output_buffer + 1;
}
return 0;
}
/* Convert a brain float number (bfloat16) into a
* single precision floating point number (float32).
*/
static float
__bfloat16_to_float32_scalar_rtx(uint16_t f16)
{
union float32 f32; /* float32 output */
uint16_t b16_s; /* float16 sign */
uint16_t b16_e; /* float16 exponent */
uint16_t b16_m; /* float16 mantissa */
uint32_t f32_s; /* float32 sign */
uint32_t f32_e; /* float32 exponent */
uint32_t f32_m; /* float32 mantissa*/
uint8_t shift; /* number of bits to be shifted */
b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S;
b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E;
b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M;
f32_s = b16_s;
switch (b16_e) {
case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */
f32_e = FP32_MASK_E >> FP32_LSB_E;
if (b16_m == 0x0) { /* infinity */
f32_m = 0;
} else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
f32_m = b16_m;
shift = FP32_MSB_M - BF16_MSB_M;
f32_m = (f32_m << shift) & FP32_MASK_M;
f32_m |= BIT(FP32_MSB_M);
}
break;
case 0: /* bfloat16: zero or subnormal */
f32_m = b16_m;
if (b16_m == 0) { /* zero signed */
f32_e = 0;
} else { /* subnormal numbers */
goto fp32_normal;
}
break;
default: /* bfloat16: normal number */
goto fp32_normal;
}
goto fp32_pack;
fp32_normal:
f32_m = b16_m;
f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E;
shift = (FP32_MSB_M - BF16_MSB_M);
f32_m = (f32_m << shift) & FP32_MASK_M;
fp32_pack:
f32.u = FP32_PACK(f32_s
error: patch failed: lib/mldev/mldev_utils_scalar.c:532
Checking patch lib/mldev/mldev_utils_scalar.h...
Checking patch lib/mldev/mldev_utils_scalar_bfloat16.c...
Applying patch lib/mldev/meson.build with 1 reject...
Rejected hunk #1.
Applied patch lib/mldev/mldev_utils_neon.c cleanly.
Applied patch lib/mldev/mldev_utils_neon_bfloat16.c cleanly.
Applying patch lib/mldev/mldev_utils_scalar.c with 2 rejects...
Rejected hunk #1.
Rejected hunk #2.
Applied patch lib/mldev/mldev_utils_scalar.h cleanly.
Applied patch lib/mldev/mldev_utils_scalar_bfloat16.c cleanly.
hint: Use 'git am --show-current-patch' to see the failed patch
diff a/lib/mldev/meson.build b/lib/mldev/meson.build (rejected hunks)
@@ -7,12 +7,21 @@ sources = files(
'mldev_utils.c',
)
-if dpdk_conf.has('RTE_ARCH_ARM64')
+if (dpdk_conf.has('RTE_ARCH_ARM64') and
+ cc.get_define('__ARM_NEON', args: machine_args) != '')
sources += files('mldev_utils_neon.c')
else
sources += files('mldev_utils_scalar.c')
endif
+if (dpdk_conf.has('RTE_ARCH_ARM64') and
+ cc.get_define('__ARM_NEON', args: machine_args) != '' and
+ cc.get_define('__ARM_FEATURE_BF16', args: machine_args) != '')
+ sources += files('mldev_utils_neon_bfloat16.c')
+else
+ sources += files('mldev_utils_scalar_bfloat16.c')
+endif
+
headers = files(
'rte_mldev.h',
)
diff a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c (rejected hunks)
@@ -2,88 +2,13 @@
* Copyright (c) 2022 Marvell.
*/
-#include <errno.h>
-#include <math.h>
-#include <stdint.h>
-
-#include "mldev_utils.h"
+#include "mldev_utils_scalar.h"
/* Description:
* This file implements scalar versions of Machine Learning utility functions used to convert data
- * types from higher precision to lower precision and vice-versa.
+ * types from higher precision to lower precision and vice-versa, except bfloat16.
*/
-#ifndef BIT
-#define BIT(nr) (1UL << (nr))
-#endif
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG (__SIZEOF_LONG__ * 8)
-#endif
-
-#ifndef GENMASK_U32
-#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-#endif
-
-/* float32: bit index of MSB & LSB of sign, exponent and mantissa */
-#define FP32_LSB_M 0
-#define FP32_MSB_M 22
-#define FP32_LSB_E 23
-#define FP32_MSB_E 30
-#define FP32_LSB_S 31
-#define FP32_MSB_S 31
-
-/* float32: bitmask for sign, exponent and mantissa */
-#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S)
-#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E)
-#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M)
-
-/* float16: bit index of MSB & LSB of sign, exponent and mantissa */
-#define FP16_LSB_M 0
-#define FP16_MSB_M 9
-#define FP16_LSB_E 10
-#define FP16_MSB_E 14
-#define FP16_LSB_S 15
-#define FP16_MSB_S 15
-
-/* float16: bitmask for sign, exponent and mantissa */
-#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S)
-#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E)
-#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M)
-
-/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */
-#define BF16_LSB_M 0
-#define BF16_MSB_M 6
-#define BF16_LSB_E 7
-#define BF16_MSB_E 14
-#define BF16_LSB_S 15
-#define BF16_MSB_S 15
-
-/* bfloat16: bitmask for sign, exponent and mantissa */
-#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S)
-#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E)
-#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M)
-
-/* Exponent bias */
-#define FP32_BIAS_E 127
-#define FP16_BIAS_E 15
-#define BF16_BIAS_E 127
-
-#define FP32_PACK(sign, exponent, mantissa) \
- (((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa))
-
-#define FP16_PACK(sign, exponent, mantissa) \
- (((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa))
-
-#define BF16_PACK(sign, exponent, mantissa) \
- (((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa))
-
-/* Represent float32 as float and uint32_t */
-union float32 {
- float f;
- uint32_t u;
-};
-
int
rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output)
{
@@ -532,186 +457,3 @@ rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output)
return 0;
}
-
-/* Convert a single precision floating point number (float32) into a
- * brain float number (bfloat16) using round to nearest rounding mode.
- */
-static uint16_t
-__float32_to_bfloat16_scalar_rtn(float x)
-{
- union float32 f32; /* float32 input */
- uint32_t f32_s; /* float32 sign */
- uint32_t f32_e; /* float32 exponent */
- uint32_t f32_m; /* float32 mantissa */
- uint16_t b16_s; /* float16 sign */
- uint16_t b16_e; /* float16 exponent */
- uint16_t b16_m; /* float16 mantissa */
- uint32_t tbits; /* number of truncated bits */
- uint16_t u16; /* float16 output */
-
- f32.f = x;
- f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S;
- f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E;
- f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M;
-
- b16_s = f32_s;
- b16_e = 0;
- b16_m = 0;
-
- switch (f32_e) {
- case (0): /* float32: zero or subnormal number */
- b16_e = 0;
- if (f32_m == 0) /* zero */
- b16_m = 0;
- else /* subnormal float32 number, normal bfloat16 */
- goto bf16_normal;
- break;
- case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */
- b16_e = BF16_MASK_E >> BF16_LSB_E;
- if (f32_m == 0) { /* infinity */
- b16_m = 0;
- } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */
- b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M);
- b16_m |= BIT(BF16_MSB_M);
- }
- break;
- default: /* float32: normal number, normal bfloat16 */
- goto bf16_normal;
- }
-
- goto bf16_pack;
-
-bf16_normal:
- b16_e = f32_e;
- tbits = FP32_MSB_M - BF16_MSB_M;
- b16_m = f32_m >> tbits;
-
- /* if non-leading truncated bits are set */
- if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) {
- b16_m++;
-
- /* if overflow into exponent */
- if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
- b16_e++;
- } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) {
- /* if only leading truncated bit is set */
- if ((b16_m & 0x1) == 0x1) {
- b16_m++;
-
- /* if overflow into exponent */
- if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1)
- b16_e++;
- }
- }
- b16_m = b16_m & BF16_MASK_M;
-
-bf16_pack:
- u16 = BF16_PACK(b16_s, b16_e, b16_m);
-
- return u16;
-}
-
-int
-rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output)
-{
- float *input_buffer;
- uint16_t *output_buffer;
- uint64_t i;
-
- if ((nb_elements == 0) || (input == NULL) || (output == NULL))
- return -EINVAL;
-
- input_buffer = (float *)input;
- output_buffer = (uint16_t *)output;
-
- for (i = 0; i < nb_elements; i++) {
- *output_buffer = __float32_to_bfloat16_scalar_rtn(*input_buffer);
-
- input_buffer = input_buffer + 1;
- output_buffer = output_buffer + 1;
- }
-
- return 0;
-}
-
-/* Convert a brain float number (bfloat16) into a
- * single precision floating point number (float32).
- */
-static float
-__bfloat16_to_float32_scalar_rtx(uint16_t f16)
-{
- union float32 f32; /* float32 output */
- uint16_t b16_s; /* float16 sign */
- uint16_t b16_e; /* float16 exponent */
- uint16_t b16_m; /* float16 mantissa */
- uint32_t f32_s; /* float32 sign */
- uint32_t f32_e; /* float32 exponent */
- uint32_t f32_m; /* float32 mantissa*/
- uint8_t shift; /* number of bits to be shifted */
-
- b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S;
- b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E;
- b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M;
-
- f32_s = b16_s;
- switch (b16_e) {
- case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */
- f32_e = FP32_MASK_E >> FP32_LSB_E;
- if (b16_m == 0x0) { /* infinity */
- f32_m = 0;
- } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */
- f32_m = b16_m;
- shift = FP32_MSB_M - BF16_MSB_M;
- f32_m = (f32_m << shift) & FP32_MASK_M;
- f32_m |= BIT(FP32_MSB_M);
- }
- break;
- case 0: /* bfloat16: zero or subnormal */
- f32_m = b16_m;
- if (b16_m == 0) { /* zero signed */
- f32_e = 0;
- } else { /* subnormal numbers */
- goto fp32_normal;
- }
- break;
- default: /* bfloat16: normal number */
- goto fp32_normal;
- }
-
- goto fp32_pack;
-
-fp32_normal:
- f32_m = b16_m;
- f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E;
-
- shift = (FP32_MSB_M - BF16_MSB_M);
- f32_m = (f32_m << shift) & FP32_MASK_M;
-
-fp32_pack:
- f32.u = FP32_PACK(f32_s, f32_e, f32_m);
-
- return f32.f;
-}
-
-int
-rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output)
-{
- uint16_t *input_buffer;
- float *output_buffer;
- uint64_t i;
-
- if ((nb_elements == 0) || (input == NULL) || (output == NULL))
- return -EINVAL;
-
- input_buffer = (uint16_t *)input;
- output_buffer = (float *)output;
-
- for (i = 0; i < nb_elements; i++) {
- *output_buffer = __bfloat16_to_float32_scalar_rtx(*input_buffer);
-
- input_buffer = input_buffer + 1;
- output_buffer = output_buffer + 1;
- }
-
- return 0;
-}
https://lab.dpdk.org/results/dashboard/patchsets/25708/
UNH-IOL DPDK Community Lab
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-03-13 14:58 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20230313120306.28911-1-syalavarthi@marvell.com>
2023-03-13 11:50 ` |WARNING| pw125066 [PATCH v2 1/1] mldev: split bfloat16 routines to separate files qemudev
2023-03-13 12:03 ` |SUCCESS| " checkpatch
2023-03-13 14:58 |WARNING| pw125066 [PATCH] [v2, " dpdklab
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).