From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id DAA5241E0C for ; Mon, 13 Mar 2023 15:09:10 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id B03BF40151; Mon, 13 Mar 2023 15:09:10 +0100 (CET) Received: from mail-pj1-f100.google.com (mail-pj1-f100.google.com [209.85.216.100]) by mails.dpdk.org (Postfix) with ESMTP id AE5AC40151 for ; Mon, 13 Mar 2023 15:09:08 +0100 (CET) Received: by mail-pj1-f100.google.com with SMTP id 6-20020a17090a190600b00237c5b6ecd7so16985951pjg.4 for ; Mon, 13 Mar 2023 07:09:08 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=iol.unh.edu; s=unh-iol; t=1678716548; h=date:message-id:cc:to:from:subject:from:to:cc:subject:date :message-id:reply-to; bh=nSOKIv+AmJBwSlq4e29uRTTkifSrglgjDOY8/jJVq7c=; b=ablixWcuvao0YXdc6IPK/oemdF+j3Kf0iOykgHE9SQg+y5llO+CuOFHOx/WtZZc7Gu WSHbewTwz0zNggb+Zj/8LKJNUA7axSTqSYia3SFC2AYLcTEZT3glHj3/JOfTM4wEu271 RyNe+rJMm7FufLl7bD2+ZtmMGnWWbMwyZDoHk= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1678716548; h=date:message-id:cc:to:from:subject:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=nSOKIv+AmJBwSlq4e29uRTTkifSrglgjDOY8/jJVq7c=; b=7nJRzajFBkvPZPgIc5lNu3nHv6yUck7Q/UliMNvBRXWM1hIwNECEEh/f1xRNTsL4AC bbA/IC/pzkdo5KWe6p2+X9XKpDHv7IOrQYGHOBeudM1de2mVkX5O4ty6YqO5PpYyGF5s RYpVKhkGQ5yAhH/62Shb7pbknQ0HfLnuaKO6lovwGoWLOl5iBqsovyGs7Mh42ipH+RvV +oPh0aBmeq9aB8jMtiGugsoFh4O99CdhO8KrMnEOMWbDiXywZEQJHS37HIi3DnMn3L21 /XPvJin2YEv9RFzCy0dNHVM9LZVM2IK2Irc4uFKxyn4/VunCXUiByKjh4+DWwCXH4nge E6wA== X-Gm-Message-State: AO0yUKUuvt2+HEbIBk+B6NbeVNrv4gRgM91/mfB9rS8JRR7E6B5s/KTY yynXHFVjJvtsGHpsv/6dsVrMz6RjFg+T5tGubvxOEZZHJmg8Mn5ubX4kfA== X-Google-Smtp-Source: AK7set8V8TqmkfPSDXzoSFfc42T7/8yt8BreYXaKBm7uDkVw+E1MivIfUY+lgnQkG34bxHq/c3Ecj5O2S76J X-Received: by 2002:a17:902:d2cd:b0:19a:96f0:b0f with SMTP id n13-20020a170902d2cd00b0019a96f00b0fmr11781311plc.28.1678716547814; Mon, 13 Mar 2023 07:09:07 -0700 (PDT) Received: from postal.iol.unh.edu (postal.iol.unh.edu. [132.177.123.84]) by smtp-relay.gmail.com with ESMTPS id m3-20020a1709026bc300b0019ab79d7565sm210629plt.142.2023.03.13.07.09.07 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 13 Mar 2023 07:09:07 -0700 (PDT) X-Relaying-Domain: iol.unh.edu Received: from dpdk-ubuntu.dpdklab.iol.unh.edu (unknown [172.18.0.210]) by postal.iol.unh.edu (Postfix) with ESMTP id 14B36605246B; Mon, 13 Mar 2023 10:09:07 -0400 (EDT) Received: from dpdk-ubuntu-2.dpdklab.iol.unh.edu (localhost [IPv6:::1]) by dpdk-ubuntu.dpdklab.iol.unh.edu (Postfix) with ESMTP id 129496011D; Mon, 13 Mar 2023 14:09:07 +0000 (UTC) Subject: |WARNING| pw125064 [PATCH] [1/1] mldev: split bfloat16 routines to separate files From: dpdklab@iol.unh.edu To: test-report@dpdk.org Cc: dpdk-test-reports@iol.unh.edu Content-Type: text/plain Message-Id: <20230313140907.129496011D@dpdk-ubuntu.dpdklab.iol.unh.edu> Date: Mon, 13 Mar 2023 14:09:07 +0000 (UTC) X-BeenThere: test-report@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: automatic DPDK test reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: test-report-bounces@dpdk.org Test-Label: iol-testing Test-Status: WARNING http://dpdk.org/patch/125064 _apply patch failure_ Submitter: Srikanth Yalavarthi Date: Monday, March 13 2023 11:43:42 Applied on: CommitID:baf13c3135d0c5998fff7edc23fb89412dc89246 Apply patch set 125064 failed: Checking patch lib/mldev/meson.build... error: while searching for: 'mldev_utils.c', ) if dpdk_conf.has('RTE_ARCH_ARM64') sources += files('mldev_utils_neon.c') else sources += files('mldev_utils_scalar.c') endif headers = files( 'rte_mldev.h', ) error: patch failed: lib/mldev/meson.build:7 Checking patch lib/mldev/mldev_utils_neon.c... Checking patch lib/mldev/mldev_utils_neon_bfloat16.c... Checking patch lib/mldev/mldev_utils_scalar.c... error: while searching for: * Copyright (c) 2022 Marvell. */ #include #include #include #include "mldev_utils.h" /* Description: * This file implements scalar versions of Machine Learning utility functions used to convert data * types from higher precision to lower precision and vice-versa. */ #ifndef BIT #define BIT(nr) (1UL << (nr)) #endif #ifndef BITS_PER_LONG #define BITS_PER_LONG (__SIZEOF_LONG__ * 8) #endif #ifndef GENMASK_U32 #define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) #endif /* float32: bit index of MSB & LSB of sign, exponent and mantissa */ #define FP32_LSB_M 0 #define FP32_MSB_M 22 #define FP32_LSB_E 23 #define FP32_MSB_E 30 #define FP32_LSB_S 31 #define FP32_MSB_S 31 /* float32: bitmask for sign, exponent and mantissa */ #define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S) #define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E) #define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M) /* float16: bit index of MSB & LSB of sign, exponent and mantissa */ #define FP16_LSB_M 0 #define FP16_MSB_M 9 #define FP16_LSB_E 10 #define FP16_MSB_E 14 #define FP16_LSB_S 15 #define FP16_MSB_S 15 /* float16: bitmask for sign, exponent and mantissa */ #define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S) #define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E) #define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M) /* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */ #define BF16_LSB_M 0 #define BF16_MSB_M 6 #define BF16_LSB_E 7 #define BF16_MSB_E 14 #define BF16_LSB_S 15 #define BF16_MSB_S 15 /* bfloat16: bitmask for sign, exponent and mantissa */ #define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S) #define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E) #define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M) /* Exponent bias */ #define FP32_BIAS_E 127 #define FP16_BIAS_E 15 #define BF16_BIAS_E 127 #define FP32_PACK(sign, exponent, mantissa) \ (((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa)) #define FP16_PACK(sign, exponent, mantissa) \ (((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa)) #define BF16_PACK(sign, exponent, mantissa) \ (((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa)) /* Represent float32 as float and uint32_t */ union float32 { float f; uint32_t u; }; int rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output) { error: patch failed: lib/mldev/mldev_utils_scalar.c:2 Hunk #2 succeeded at 334 (offset 75 lines). Hunk #3 succeeded at 477 (offset 75 lines). error: while searching for: return 0; } /* Convert a single precision floating point number (float32) into a * brain float number (bfloat16) using round to nearest rounding mode. */ static uint16_t __float32_to_bfloat16_scalar_rtn(float x) { union float32 f32; /* float32 input */ uint32_t f32_s; /* float32 sign */ uint32_t f32_e; /* float32 exponent */ uint32_t f32_m; /* float32 mantissa */ uint16_t b16_s; /* float16 sign */ uint16_t b16_e; /* float16 exponent */ uint16_t b16_m; /* float16 mantissa */ uint32_t tbits; /* number of truncated bits */ uint16_t u16; /* float16 output */ f32.f = x; f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; b16_s = f32_s; b16_e = 0; b16_m = 0; switch (f32_e) { case (0): /* float32: zero or subnormal number */ b16_e = 0; if (f32_m == 0) /* zero */ b16_m = 0; else /* subnormal float32 number, normal bfloat16 */ goto bf16_normal; break; case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ b16_e = BF16_MASK_E >> BF16_LSB_E; if (f32_m == 0) { /* infinity */ b16_m = 0; } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M); b16_m |= BIT(BF16_MSB_M); } break; default: /* float32: normal number, normal bfloat16 */ goto bf16_normal; } goto bf16_pack; bf16_normal: b16_e = f32_e; tbits = FP32_MSB_M - BF16_MSB_M; b16_m = f32_m >> tbits; /* if non-leading truncated bits are set */ if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { b16_m++; /* if overflow into exponent */ if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) b16_e++; } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { /* if only leading truncated bit is set */ if ((b16_m & 0x1) == 0x1) { b16_m++; /* if overflow into exponent */ if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) b16_e++; } } b16_m = b16_m & BF16_MASK_M; bf16_pack: u16 = BF16_PACK(b16_s, b16_e, b16_m); return u16; } int rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output) { float *input_buffer; uint16_t *output_buffer; uint64_t i; if ((nb_elements == 0) || (input == NULL) || (output == NULL)) return -EINVAL; input_buffer = (float *)input; output_buffer = (uint16_t *)output; for (i = 0; i < nb_elements; i++) { *output_buffer = __float32_to_bfloat16_scalar_rtn(*input_buffer); input_buffer = input_buffer + 1; output_buffer = output_buffer + 1; } return 0; } /* Convert a brain float number (bfloat16) into a * single precision floating point number (float32). */ static float __bfloat16_to_float32_scalar_rtx(uint16_t f16) { union float32 f32; /* float32 output */ uint16_t b16_s; /* float16 sign */ uint16_t b16_e; /* float16 exponent */ uint16_t b16_m; /* float16 mantissa */ uint32_t f32_s; /* float32 sign */ uint32_t f32_e; /* float32 exponent */ uint32_t f32_m; /* float32 mantissa*/ uint8_t shift; /* number of bits to be shifted */ b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S; b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E; b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M; f32_s = b16_s; switch (b16_e) { case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */ f32_e = FP32_MASK_E >> FP32_LSB_E; if (b16_m == 0x0) { /* infinity */ f32_m = 0; } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ f32_m = b16_m; shift = FP32_MSB_M - BF16_MSB_M; f32_m = (f32_m << shift) & FP32_MASK_M; f32_m |= BIT(FP32_MSB_M); } break; case 0: /* bfloat16: zero or subnormal */ f32_m = b16_m; if (b16_m == 0) { /* zero signed */ f32_e = 0; } else { /* subnormal numbers */ goto fp32_normal; } break; default: /* bfloat16: normal number */ goto fp32_normal; } goto fp32_pack; fp32_normal: f32_m = b16_m; f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E; shift = (FP32_MSB_M - BF16_MSB_M); f32_m = (f32_m << shift) & FP32_MASK_M; fp32_pack: f32.u = FP32_PACK(f32_s error: patch failed: lib/mldev/mldev_utils_scalar.c:535 Checking patch lib/mldev/mldev_utils_scalar.h... Checking patch lib/mldev/mldev_utils_scalar_bfloat16.c... Applying patch lib/mldev/meson.build with 1 reject... Rejected hunk #1. Applied patch lib/mldev/mldev_utils_neon.c cleanly. Applied patch lib/mldev/mldev_utils_neon_bfloat16.c cleanly. Applying patch lib/mldev/mldev_utils_scalar.c with 2 rejects... Rejected hunk #1. Hunk #2 applied cleanly. Hunk #3 applied cleanly. Rejected hunk #4. Applied patch lib/mldev/mldev_utils_scalar.h cleanly. Applied patch lib/mldev/mldev_utils_scalar_bfloat16.c cleanly. hint: Use 'git am --show-current-patch' to see the failed patch diff a/lib/mldev/meson.build b/lib/mldev/meson.build (rejected hunks) @@ -7,12 +7,21 @@ sources = files( 'mldev_utils.c', ) -if dpdk_conf.has('RTE_ARCH_ARM64') +if (dpdk_conf.has('RTE_ARCH_ARM64') and + cc.get_define('__ARM_NEON', args: machine_args) != '') sources += files('mldev_utils_neon.c') else sources += files('mldev_utils_scalar.c') endif +if (dpdk_conf.has('RTE_ARCH_ARM64') and + cc.get_define('__ARM_NEON', args: machine_args) != '' and + cc.get_define('__ARM_FEATURE_BF16', args: machine_args) != '') + sources += files('mldev_utils_neon_bfloat16.c') +else + sources += files('mldev_utils_scalar_bfloat16.c') +endif + headers = files( 'rte_mldev.h', ) diff a/lib/mldev/mldev_utils_scalar.c b/lib/mldev/mldev_utils_scalar.c (rejected hunks) @@ -2,88 +2,13 @@ * Copyright (c) 2022 Marvell. */ -#include -#include -#include - -#include "mldev_utils.h" +#include "mldev_utils_scalar.h" /* Description: * This file implements scalar versions of Machine Learning utility functions used to convert data - * types from higher precision to lower precision and vice-versa. + * types from higher precision to lower precision and vice-versa, except bfloat16. */ -#ifndef BIT -#define BIT(nr) (1UL << (nr)) -#endif - -#ifndef BITS_PER_LONG -#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) -#endif - -#ifndef GENMASK_U32 -#define GENMASK_U32(h, l) (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) -#endif - -/* float32: bit index of MSB & LSB of sign, exponent and mantissa */ -#define FP32_LSB_M 0 -#define FP32_MSB_M 22 -#define FP32_LSB_E 23 -#define FP32_MSB_E 30 -#define FP32_LSB_S 31 -#define FP32_MSB_S 31 - -/* float32: bitmask for sign, exponent and mantissa */ -#define FP32_MASK_S GENMASK_U32(FP32_MSB_S, FP32_LSB_S) -#define FP32_MASK_E GENMASK_U32(FP32_MSB_E, FP32_LSB_E) -#define FP32_MASK_M GENMASK_U32(FP32_MSB_M, FP32_LSB_M) - -/* float16: bit index of MSB & LSB of sign, exponent and mantissa */ -#define FP16_LSB_M 0 -#define FP16_MSB_M 9 -#define FP16_LSB_E 10 -#define FP16_MSB_E 14 -#define FP16_LSB_S 15 -#define FP16_MSB_S 15 - -/* float16: bitmask for sign, exponent and mantissa */ -#define FP16_MASK_S GENMASK_U32(FP16_MSB_S, FP16_LSB_S) -#define FP16_MASK_E GENMASK_U32(FP16_MSB_E, FP16_LSB_E) -#define FP16_MASK_M GENMASK_U32(FP16_MSB_M, FP16_LSB_M) - -/* bfloat16: bit index of MSB & LSB of sign, exponent and mantissa */ -#define BF16_LSB_M 0 -#define BF16_MSB_M 6 -#define BF16_LSB_E 7 -#define BF16_MSB_E 14 -#define BF16_LSB_S 15 -#define BF16_MSB_S 15 - -/* bfloat16: bitmask for sign, exponent and mantissa */ -#define BF16_MASK_S GENMASK_U32(BF16_MSB_S, BF16_LSB_S) -#define BF16_MASK_E GENMASK_U32(BF16_MSB_E, BF16_LSB_E) -#define BF16_MASK_M GENMASK_U32(BF16_MSB_M, BF16_LSB_M) - -/* Exponent bias */ -#define FP32_BIAS_E 127 -#define FP16_BIAS_E 15 -#define BF16_BIAS_E 127 - -#define FP32_PACK(sign, exponent, mantissa) \ - (((sign) << FP32_LSB_S) | ((exponent) << FP32_LSB_E) | (mantissa)) - -#define FP16_PACK(sign, exponent, mantissa) \ - (((sign) << FP16_LSB_S) | ((exponent) << FP16_LSB_E) | (mantissa)) - -#define BF16_PACK(sign, exponent, mantissa) \ - (((sign) << BF16_LSB_S) | ((exponent) << BF16_LSB_E) | (mantissa)) - -/* Represent float32 as float and uint32_t */ -union float32 { - float f; - uint32_t u; -}; - int rte_ml_io_float32_to_int8(float scale, uint64_t nb_elements, void *input, void *output) { @@ -535,186 +460,3 @@ rte_ml_io_float16_to_float32(uint64_t nb_elements, void *input, void *output) return 0; } - -/* Convert a single precision floating point number (float32) into a - * brain float number (bfloat16) using round to nearest rounding mode. - */ -static uint16_t -__float32_to_bfloat16_scalar_rtn(float x) -{ - union float32 f32; /* float32 input */ - uint32_t f32_s; /* float32 sign */ - uint32_t f32_e; /* float32 exponent */ - uint32_t f32_m; /* float32 mantissa */ - uint16_t b16_s; /* float16 sign */ - uint16_t b16_e; /* float16 exponent */ - uint16_t b16_m; /* float16 mantissa */ - uint32_t tbits; /* number of truncated bits */ - uint16_t u16; /* float16 output */ - - f32.f = x; - f32_s = (f32.u & FP32_MASK_S) >> FP32_LSB_S; - f32_e = (f32.u & FP32_MASK_E) >> FP32_LSB_E; - f32_m = (f32.u & FP32_MASK_M) >> FP32_LSB_M; - - b16_s = f32_s; - b16_e = 0; - b16_m = 0; - - switch (f32_e) { - case (0): /* float32: zero or subnormal number */ - b16_e = 0; - if (f32_m == 0) /* zero */ - b16_m = 0; - else /* subnormal float32 number, normal bfloat16 */ - goto bf16_normal; - break; - case (FP32_MASK_E >> FP32_LSB_E): /* float32: infinity or nan */ - b16_e = BF16_MASK_E >> BF16_LSB_E; - if (f32_m == 0) { /* infinity */ - b16_m = 0; - } else { /* nan, propagate mantissa and set MSB of mantissa to 1 */ - b16_m = f32_m >> (FP32_MSB_M - BF16_MSB_M); - b16_m |= BIT(BF16_MSB_M); - } - break; - default: /* float32: normal number, normal bfloat16 */ - goto bf16_normal; - } - - goto bf16_pack; - -bf16_normal: - b16_e = f32_e; - tbits = FP32_MSB_M - BF16_MSB_M; - b16_m = f32_m >> tbits; - - /* if non-leading truncated bits are set */ - if ((f32_m & GENMASK_U32(tbits - 1, 0)) > BIT(tbits - 1)) { - b16_m++; - - /* if overflow into exponent */ - if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) - b16_e++; - } else if ((f32_m & GENMASK_U32(tbits - 1, 0)) == BIT(tbits - 1)) { - /* if only leading truncated bit is set */ - if ((b16_m & 0x1) == 0x1) { - b16_m++; - - /* if overflow into exponent */ - if (((b16_m & BF16_MASK_E) >> BF16_LSB_E) == 0x1) - b16_e++; - } - } - b16_m = b16_m & BF16_MASK_M; - -bf16_pack: - u16 = BF16_PACK(b16_s, b16_e, b16_m); - - return u16; -} - -int -rte_ml_io_float32_to_bfloat16(uint64_t nb_elements, void *input, void *output) -{ - float *input_buffer; - uint16_t *output_buffer; - uint64_t i; - - if ((nb_elements == 0) || (input == NULL) || (output == NULL)) - return -EINVAL; - - input_buffer = (float *)input; - output_buffer = (uint16_t *)output; - - for (i = 0; i < nb_elements; i++) { - *output_buffer = __float32_to_bfloat16_scalar_rtn(*input_buffer); - - input_buffer = input_buffer + 1; - output_buffer = output_buffer + 1; - } - - return 0; -} - -/* Convert a brain float number (bfloat16) into a - * single precision floating point number (float32). - */ -static float -__bfloat16_to_float32_scalar_rtx(uint16_t f16) -{ - union float32 f32; /* float32 output */ - uint16_t b16_s; /* float16 sign */ - uint16_t b16_e; /* float16 exponent */ - uint16_t b16_m; /* float16 mantissa */ - uint32_t f32_s; /* float32 sign */ - uint32_t f32_e; /* float32 exponent */ - uint32_t f32_m; /* float32 mantissa*/ - uint8_t shift; /* number of bits to be shifted */ - - b16_s = (f16 & BF16_MASK_S) >> BF16_LSB_S; - b16_e = (f16 & BF16_MASK_E) >> BF16_LSB_E; - b16_m = (f16 & BF16_MASK_M) >> BF16_LSB_M; - - f32_s = b16_s; - switch (b16_e) { - case (BF16_MASK_E >> BF16_LSB_E): /* bfloat16: infinity or nan */ - f32_e = FP32_MASK_E >> FP32_LSB_E; - if (b16_m == 0x0) { /* infinity */ - f32_m = 0; - } else { /* nan, propagate mantissa, set MSB of mantissa to 1 */ - f32_m = b16_m; - shift = FP32_MSB_M - BF16_MSB_M; - f32_m = (f32_m << shift) & FP32_MASK_M; - f32_m |= BIT(FP32_MSB_M); - } - break; - case 0: /* bfloat16: zero or subnormal */ - f32_m = b16_m; - if (b16_m == 0) { /* zero signed */ - f32_e = 0; - } else { /* subnormal numbers */ - goto fp32_normal; - } - break; - default: /* bfloat16: normal number */ - goto fp32_normal; - } - - goto fp32_pack; - -fp32_normal: - f32_m = b16_m; - f32_e = FP32_BIAS_E + b16_e - BF16_BIAS_E; - - shift = (FP32_MSB_M - BF16_MSB_M); - f32_m = (f32_m << shift) & FP32_MASK_M; - -fp32_pack: - f32.u = FP32_PACK(f32_s, f32_e, f32_m); - - return f32.f; -} - -int -rte_ml_io_bfloat16_to_float32(uint64_t nb_elements, void *input, void *output) -{ - uint16_t *input_buffer; - float *output_buffer; - uint64_t i; - - if ((nb_elements == 0) || (input == NULL) || (output == NULL)) - return -EINVAL; - - input_buffer = (uint16_t *)input; - output_buffer = (float *)output; - - for (i = 0; i < nb_elements; i++) { - *output_buffer = __bfloat16_to_float32_scalar_rtx(*input_buffer); - - input_buffer = input_buffer + 1; - output_buffer = output_buffer + 1; - } - - return 0; -} https://lab.dpdk.org/results/dashboard/patchsets/25706/ UNH-IOL DPDK Community Lab