* [PATCH v2 0/3] net: add thread-safe crc api
@ 2024-10-01 18:11 Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: Arkadiusz Kusztal @ 2024-10-01 18:11 UTC (permalink / raw)
To: dev; +Cc: ferruh.yigit, kai.ji, brian.dooley, Arkadiusz Kusztal
The current net CRC API is not thread-safe, this patch
solves this by adding another, thread-safe API functions.
This API is also safe to use across multiple processes,
yet with limitations on max-simd-bitwidth, which will be checked only by
the process that created the CRC context; all other processes will use
the same CRC function when used with the same CRC context.
It is an undefined behavior when process binaries are compiled
with different SIMD capabilities when the same CRC context is used
v2:
- removed old API
- added multi-process support
- replaced test cases
- marked internal functions as __rte_internal
Arkadiusz Kusztal (3):
net: add thread-safe crc api
crypto/qat: use process safe crc api
test/crc: replace thread-unsafe api functions
app/test/test_crc.c | 168 ++++++++-----------
doc/guides/cryptodevs/qat.rst | 6 +
drivers/crypto/qat/qat_sym.h | 6 +-
drivers/crypto/qat/qat_sym_session.c | 3 +
drivers/crypto/qat/qat_sym_session.h | 2 +
lib/net/net_crc.h | 19 ++-
lib/net/rte_net_crc.c | 309 ++++++++++-------------------------
lib/net/rte_net_crc.h | 40 +----
lib/net/version.map | 18 +-
9 files changed, 204 insertions(+), 367 deletions(-)
--
2.13.6
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 18:11 [PATCH v2 0/3] net: add thread-safe crc api Arkadiusz Kusztal
@ 2024-10-01 18:11 ` Arkadiusz Kusztal
2024-10-01 21:44 ` Stephen Hemminger
` (3 more replies)
2024-10-01 18:11 ` [PATCH v2 2/3] crypto/qat: use process safe " Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 3/3] test/crc: replace thread-unsafe api functions Arkadiusz Kusztal
2 siblings, 4 replies; 17+ messages in thread
From: Arkadiusz Kusztal @ 2024-10-01 18:11 UTC (permalink / raw)
To: dev; +Cc: ferruh.yigit, kai.ji, brian.dooley, Arkadiusz Kusztal
The current net CRC API is not thread-safe, this patch
solves this by adding another, thread-safe API functions.
This API is also safe to use across multiple processes,
yet with limitations on max-simd-bitwidth, which will be checked only by
the process that created the CRC context; all other processes will use
the same CRC function when used with the same CRC context.
It is an undefined behavior when process binaries are compiled
with different SIMD capabilities when the same CRC context is used.
Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
---
lib/net/net_crc.h | 19 ++--
lib/net/rte_net_crc.c | 309 +++++++++++++++-----------------------------------
lib/net/rte_net_crc.h | 40 ++-----
lib/net/version.map | 18 ++-
4 files changed, 124 insertions(+), 262 deletions(-)
diff --git a/lib/net/net_crc.h b/lib/net/net_crc.h
index 7a74d5406c..d220200685 100644
--- a/lib/net/net_crc.h
+++ b/lib/net/net_crc.h
@@ -5,40 +5,41 @@
#ifndef _NET_CRC_H_
#define _NET_CRC_H_
-/*
- * Different implementations of CRC
- */
-
-/* SSE4.2 */
+#include <rte_compat.h>
+__rte_internal
void
rte_net_crc_sse42_init(void);
+__rte_internal
uint32_t
rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len);
+__rte_internal
uint32_t
rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len);
-/* AVX512 */
-
+__rte_internal
void
rte_net_crc_avx512_init(void);
+__rte_internal
uint32_t
rte_crc16_ccitt_avx512_handler(const uint8_t *data, uint32_t data_len);
+__rte_internal
uint32_t
rte_crc32_eth_avx512_handler(const uint8_t *data, uint32_t data_len);
-/* NEON */
-
+__rte_internal
void
rte_net_crc_neon_init(void);
+__rte_internal
uint32_t
rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
+__rte_internal
uint32_t
rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
diff --git a/lib/net/rte_net_crc.c b/lib/net/rte_net_crc.c
index 346c285c15..da2f014d26 100644
--- a/lib/net/rte_net_crc.c
+++ b/lib/net/rte_net_crc.c
@@ -2,7 +2,6 @@
* Copyright(c) 2017-2020 Intel Corporation
*/
-#include <stddef.h>
#include <stdint.h>
#include <rte_cpuflags.h>
@@ -13,81 +12,26 @@
#include "net_crc.h"
-/** CRC polynomials */
+RTE_LOG_REGISTER_DEFAULT(libnet_logtype, INFO);
+#define RTE_LOGTYPE_NET libnet_logtype
+
+#define NET_LOG(level, ...) \
+ RTE_LOG_LINE_PREFIX(level, NET, "%s(): ", __func__, __VA_ARGS__)
+
#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
#define CRC16_CCITT_POLYNOMIAL 0x1021U
-
#define CRC_LUT_SIZE 256
-/* crc tables */
static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
-static uint32_t
-rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len);
-
-static uint32_t
-rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len);
-
-static uint32_t
-rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
-
-static uint32_t
-rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
-
-typedef uint32_t
-(*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
-
-static rte_net_crc_handler handlers_default[] = {
- [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler,
- [RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler,
-};
-
-static const rte_net_crc_handler *handlers = handlers_default;
-
-static const rte_net_crc_handler handlers_scalar[] = {
- [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
- [RTE_NET_CRC32_ETH] = rte_crc32_eth_handler,
-};
-#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
-static const rte_net_crc_handler handlers_avx512[] = {
- [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_avx512_handler,
- [RTE_NET_CRC32_ETH] = rte_crc32_eth_avx512_handler,
-};
-#endif
-#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
-static const rte_net_crc_handler handlers_sse42[] = {
- [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,
- [RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler,
-};
-#endif
-#ifdef CC_ARM64_NEON_PMULL_SUPPORT
-static const rte_net_crc_handler handlers_neon[] = {
- [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_neon_handler,
- [RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler,
-};
-#endif
-
-static uint16_t max_simd_bitwidth;
-
-RTE_LOG_REGISTER_DEFAULT(libnet_logtype, INFO);
-#define RTE_LOGTYPE_NET libnet_logtype
-
-#define NET_LOG(level, ...) \
- RTE_LOG_LINE_PREFIX(level, NET, "%s(): ", __func__, __VA_ARGS__)
-
-/* Scalar handling */
+static struct
+{
+ uint32_t (*f[RTE_NET_CRC_REQS])
+ (const uint8_t *data, uint32_t data_len);
+} handlers[RTE_NET_CRC_AVX512 + 1];
-/**
- * Reflect the bits about the middle
- *
- * @param val
- * value to be reflected
- *
- * @return
- * reflected value
- */
-static uint32_t
+static inline uint32_t
reflect_32bits(uint32_t val)
{
uint32_t i, res = 0;
@@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
return res;
}
-static void
-crc32_eth_init_lut(uint32_t poly,
- uint32_t *lut)
-{
- uint32_t i, j;
-
- for (i = 0; i < CRC_LUT_SIZE; i++) {
- uint32_t crc = reflect_32bits(i);
-
- for (j = 0; j < 8; j++) {
- if (crc & 0x80000000L)
- crc = (crc << 1) ^ poly;
- else
- crc <<= 1;
- }
- lut[i] = reflect_32bits(crc);
- }
-}
-
-static __rte_always_inline uint32_t
+static inline uint32_t
crc32_eth_calc_lut(const uint8_t *data,
uint32_t data_len,
uint32_t crc,
@@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
return crc;
}
-static void
-rte_net_crc_scalar_init(void)
-{
- /* 32-bit crc init */
- crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
-
- /* 16-bit CRC init */
- crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
-}
-
static inline uint32_t
-rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
+crc16_ccitt(const uint8_t *data, uint32_t data_len)
{
- /* return 16-bit CRC value */
return (uint16_t)~crc32_eth_calc_lut(data,
data_len,
0xffff,
@@ -151,16 +65,42 @@ rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
}
static inline uint32_t
-rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
+crc32_eth(const uint8_t *data, uint32_t data_len)
{
- /* return 32-bit CRC value */
return ~crc32_eth_calc_lut(data,
data_len,
0xffffffffUL,
crc32_eth_lut);
}
-/* AVX512/VPCLMULQDQ handling */
+static void
+crc32_eth_init_lut(uint32_t poly,
+ uint32_t *lut)
+{
+ uint32_t i, j;
+
+ for (i = 0; i < CRC_LUT_SIZE; i++) {
+ uint32_t crc = reflect_32bits(i);
+
+ for (j = 0; j < 8; j++) {
+ if (crc & 0x80000000L)
+ crc = (crc << 1) ^ poly;
+ else
+ crc <<= 1;
+ }
+ lut[i] = reflect_32bits(crc);
+ }
+}
+
+static void
+crc_scalar_init(void)
+{
+ crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
+ crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
+
+ handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] = crc16_ccitt;
+ handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
+}
#define AVX512_VPCLMULQDQ_CPU_SUPPORTED ( \
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) && \
@@ -171,170 +111,101 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
rte_cpu_get_flag_enabled(RTE_CPUFLAG_VPCLMULQDQ) \
)
-static const rte_net_crc_handler *
-avx512_vpclmulqdq_get_handlers(void)
-{
-#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
- if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
- max_simd_bitwidth >= RTE_VECT_SIMD_512)
- return handlers_avx512;
-#endif
- NET_LOG(INFO, "Requirements not met, can't use AVX512");
- return NULL;
-}
-
static void
avx512_vpclmulqdq_init(void)
{
#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
- if (AVX512_VPCLMULQDQ_CPU_SUPPORTED)
+ if (AVX512_VPCLMULQDQ_CPU_SUPPORTED) {
rte_net_crc_avx512_init();
+ handlers[RTE_NET_CRC_AVX512].f[RTE_NET_CRC16_CCITT] =
+ rte_crc16_ccitt_avx512_handler;
+ handlers[RTE_NET_CRC_AVX512].f[RTE_NET_CRC32_ETH] =
+ rte_crc32_eth_avx512_handler;
+ }
#endif
}
-/* SSE4.2/PCLMULQDQ handling */
-
#define SSE42_PCLMULQDQ_CPU_SUPPORTED \
rte_cpu_get_flag_enabled(RTE_CPUFLAG_PCLMULQDQ)
-static const rte_net_crc_handler *
-sse42_pclmulqdq_get_handlers(void)
-{
-#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
- if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
- max_simd_bitwidth >= RTE_VECT_SIMD_128)
- return handlers_sse42;
-#endif
- NET_LOG(INFO, "Requirements not met, can't use SSE");
- return NULL;
-}
-
static void
sse42_pclmulqdq_init(void)
{
#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
- if (SSE42_PCLMULQDQ_CPU_SUPPORTED)
+ if (SSE42_PCLMULQDQ_CPU_SUPPORTED) {
rte_net_crc_sse42_init();
+ handlers[RTE_NET_CRC_SSE42].f[RTE_NET_CRC16_CCITT] =
+ rte_crc16_ccitt_sse42_handler;
+ handlers[RTE_NET_CRC_SSE42].f[RTE_NET_CRC32_ETH] =
+ rte_crc32_eth_sse42_handler;
+ }
#endif
}
-/* NEON/PMULL handling */
-
#define NEON_PMULL_CPU_SUPPORTED \
rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)
-static const rte_net_crc_handler *
-neon_pmull_get_handlers(void)
-{
-#ifdef CC_ARM64_NEON_PMULL_SUPPORT
- if (NEON_PMULL_CPU_SUPPORTED &&
- max_simd_bitwidth >= RTE_VECT_SIMD_128)
- return handlers_neon;
-#endif
- NET_LOG(INFO, "Requirements not met, can't use NEON");
- return NULL;
-}
-
static void
neon_pmull_init(void)
{
#ifdef CC_ARM64_NEON_PMULL_SUPPORT
- if (NEON_PMULL_CPU_SUPPORTED)
+ if (NEON_PMULL_CPU_SUPPORTED) {
rte_net_crc_neon_init();
+ handlers[RTE_NET_CRC_NEON].f[RTE_NET_CRC16_CCITT] =
+ rte_crc16_ccitt_neon_handler;
+ handlers[RTE_NET_CRC_NEON].f[RTE_NET_CRC32_ETH] =
+ rte_crc32_eth_neon_handler;
+ }
#endif
}
-/* Default handling */
-
-static uint32_t
-rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len)
-{
- handlers = NULL;
- if (max_simd_bitwidth == 0)
- max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
-
- handlers = avx512_vpclmulqdq_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC16_CCITT](data, data_len);
- handlers = sse42_pclmulqdq_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC16_CCITT](data, data_len);
- handlers = neon_pmull_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC16_CCITT](data, data_len);
- handlers = handlers_scalar;
- return handlers[RTE_NET_CRC16_CCITT](data, data_len);
-}
-
-static uint32_t
-rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
+struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
+ enum rte_net_crc_type type)
{
- handlers = NULL;
- if (max_simd_bitwidth == 0)
- max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
-
- handlers = avx512_vpclmulqdq_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC32_ETH](data, data_len);
- handlers = sse42_pclmulqdq_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC32_ETH](data, data_len);
- handlers = neon_pmull_get_handlers();
- if (handlers != NULL)
- return handlers[RTE_NET_CRC32_ETH](data, data_len);
- handlers = handlers_scalar;
- return handlers[RTE_NET_CRC32_ETH](data, data_len);
-}
+ uint16_t max_simd_bitwidth;
-/* Public API */
-
-void
-rte_net_crc_set_alg(enum rte_net_crc_alg alg)
-{
- handlers = NULL;
- if (max_simd_bitwidth == 0)
- max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
+ max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
switch (alg) {
case RTE_NET_CRC_AVX512:
- handlers = avx512_vpclmulqdq_get_handlers();
- if (handlers != NULL)
- break;
+#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
+ if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
+ max_simd_bitwidth >= RTE_VECT_SIMD_512) {
+ return (struct rte_net_crc){ RTE_NET_CRC_AVX512, type };
+ }
+#endif
/* fall-through */
case RTE_NET_CRC_SSE42:
- handlers = sse42_pclmulqdq_get_handlers();
- break; /* for x86, always break here */
+#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
+ if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
+ max_simd_bitwidth >= RTE_VECT_SIMD_128) {
+ return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type };
+ }
+#endif
+ break;
case RTE_NET_CRC_NEON:
- handlers = neon_pmull_get_handlers();
- /* fall-through */
- case RTE_NET_CRC_SCALAR:
- /* fall-through */
+#ifdef CC_ARM64_NEON_PMULL_SUPPORT
+ if (NEON_PMULL_CPU_SUPPORTED &&
+ max_simd_bitwidth >= RTE_VECT_SIMD_128) {
+ return (struct rte_net_crc){ RTE_NET_CRC_NEON, type };
+ }
+#endif
+ break;
default:
break;
}
-
- if (handlers == NULL)
- handlers = handlers_scalar;
+ return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
}
-uint32_t
-rte_net_crc_calc(const void *data,
- uint32_t data_len,
- enum rte_net_crc_type type)
+uint32_t rte_net_crc(const struct rte_net_crc *ctx,
+ const void *data, const uint32_t data_len)
{
- uint32_t ret;
- rte_net_crc_handler f_handle;
-
- f_handle = handlers[type];
- ret = f_handle(data, data_len);
-
- return ret;
+ return handlers[ctx->alg].f[ctx->type](data, data_len);
}
-/* Call initialisation helpers for all crc algorithm handlers */
RTE_INIT(rte_net_crc_init)
{
- rte_net_crc_scalar_init();
+ crc_scalar_init();
sse42_pclmulqdq_init();
avx512_vpclmulqdq_init();
neon_pmull_init();
diff --git a/lib/net/rte_net_crc.h b/lib/net/rte_net_crc.h
index 72d3e10ff6..2ec8ebbb91 100644
--- a/lib/net/rte_net_crc.h
+++ b/lib/net/rte_net_crc.h
@@ -5,8 +5,6 @@
#ifndef _RTE_NET_CRC_H_
#define _RTE_NET_CRC_H_
-#include <stdint.h>
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -26,39 +24,17 @@ enum rte_net_crc_alg {
RTE_NET_CRC_AVX512,
};
-/**
- * This API set the CRC computation algorithm (i.e. scalar version,
- * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
- * structure.
- *
- * @param alg
- * This parameter is used to select the CRC implementation version.
- * - RTE_NET_CRC_SCALAR
- * - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
- * - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)
- * - RTE_NET_CRC_AVX512 (Use 512-bit AVX intrinsic)
- */
-void
-rte_net_crc_set_alg(enum rte_net_crc_alg alg);
+struct rte_net_crc {
+ enum rte_net_crc_alg alg;
+ enum rte_net_crc_type type;
+};
-/**
- * CRC compute API
- *
- * @param data
- * Pointer to the packet data for CRC computation
- * @param data_len
- * Data length for CRC computation
- * @param type
- * CRC type (enum rte_net_crc_type)
- *
- * @return
- * CRC value
- */
-uint32_t
-rte_net_crc_calc(const void *data,
- uint32_t data_len,
+struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
enum rte_net_crc_type type);
+uint32_t rte_net_crc(const struct rte_net_crc *ctx,
+ const void *data, const uint32_t data_len);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/net/version.map b/lib/net/version.map
index bec4ce23ea..47daf1464a 100644
--- a/lib/net/version.map
+++ b/lib/net/version.map
@@ -4,11 +4,25 @@ DPDK_25 {
rte_eth_random_addr;
rte_ether_format_addr;
rte_ether_unformat_addr;
- rte_net_crc_calc;
- rte_net_crc_set_alg;
rte_net_get_ptype;
rte_net_make_rarp_packet;
rte_net_skip_ip6_ext;
+ rte_net_crc;
+ rte_net_crc_set;
local: *;
};
+
+INTERNAL {
+ global:
+
+ rte_net_crc_sse42_init;
+ rte_crc16_ccitt_sse42_handler;
+ rte_crc32_eth_sse42_handler;
+ rte_net_crc_avx512_init;
+ rte_crc16_ccitt_avx512_handler;
+ rte_crc32_eth_avx512_handler;
+ rte_net_crc_neon_init;
+ rte_crc16_ccitt_neon_handler;
+ rte_crc32_eth_neon_handler;
+};
--
2.13.6
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 2/3] crypto/qat: use process safe crc api
2024-10-01 18:11 [PATCH v2 0/3] net: add thread-safe crc api Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
@ 2024-10-01 18:11 ` Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 3/3] test/crc: replace thread-unsafe api functions Arkadiusz Kusztal
2 siblings, 0 replies; 17+ messages in thread
From: Arkadiusz Kusztal @ 2024-10-01 18:11 UTC (permalink / raw)
To: dev; +Cc: ferruh.yigit, kai.ji, brian.dooley, Arkadiusz Kusztal
This patch replaces thread-unsafe CRC functions
with the safe ones.
Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
---
doc/guides/cryptodevs/qat.rst | 6 ++++++
drivers/crypto/qat/qat_sym.h | 6 ++----
drivers/crypto/qat/qat_sym_session.c | 3 +++
drivers/crypto/qat/qat_sym_session.h | 2 ++
4 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index 68d792e4cc..d98195300b 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -148,6 +148,12 @@ Limitations
DOCSIS security protocol.
* Multi-segment buffers are not supported for combined Crypto-CRC DOCSIS
security protocol.
+* Max SIMD bitwidth (--force-max-simd-bitwidth) in multi-process may be ignored
+ by QAT in certain scenarios. In use cases where CRC computation is needed and
+ the device does not support it internally, the CRC context is created by the session;
+ therefore, the SIMD config used in the process that created the session will be persisant
+ across all processes. It is an undefined behavior when different process binaries are
+ compiled with different SIMD capabilities when using software CRC.
Extra notes on KASUMI F9
~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/crypto/qat/qat_sym.h b/drivers/crypto/qat/qat_sym.h
index eedf5de755..d0fda48e01 100644
--- a/drivers/crypto/qat/qat_sym.h
+++ b/drivers/crypto/qat/qat_sym.h
@@ -267,8 +267,7 @@ qat_crc_verify(struct qat_sym_session *ctx, struct rte_crypto_op *op)
crc_data = rte_pktmbuf_mtod_offset(sym_op->m_src, uint8_t *,
crc_data_ofs);
- crc = rte_net_crc_calc(crc_data, crc_data_len,
- RTE_NET_CRC32_ETH);
+ crc = rte_net_crc(&ctx->crc, crc_data, crc_data_len);
if (crc != *(uint32_t *)(crc_data + crc_data_len))
op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
@@ -291,8 +290,7 @@ qat_crc_generate(struct qat_sym_session *ctx,
crc_data = rte_pktmbuf_mtod_offset(sym_op->m_src, uint8_t *,
sym_op->auth.data.offset);
crc = (uint32_t *)(crc_data + crc_data_len);
- *crc = rte_net_crc_calc(crc_data, crc_data_len,
- RTE_NET_CRC32_ETH);
+ *crc = rte_net_crc(&ctx->crc, crc_data, crc_data_len);
}
}
diff --git a/drivers/crypto/qat/qat_sym_session.c b/drivers/crypto/qat/qat_sym_session.c
index eb267db424..4f281de9c5 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -3176,6 +3176,9 @@ qat_sec_session_set_docsis_parameters(struct rte_cryptodev *dev,
ret = qat_sym_session_configure_crc(dev, xform, session);
if (ret < 0)
return ret;
+ } else {
+ /* Initialize crc algorithm */
+ session->crc = rte_net_crc_set(RTE_NET_CRC_AVX512, RTE_NET_CRC32_ETH);
}
qat_sym_session_finalize(session);
diff --git a/drivers/crypto/qat/qat_sym_session.h b/drivers/crypto/qat/qat_sym_session.h
index f2634774ec..fb84d0f494 100644
--- a/drivers/crypto/qat/qat_sym_session.h
+++ b/drivers/crypto/qat/qat_sym_session.h
@@ -7,6 +7,7 @@
#include <rte_crypto.h>
#include <cryptodev_pmd.h>
#include <rte_security.h>
+#include <rte_net_crc.h>
#include "qat_common.h"
#include "icp_qat_hw.h"
@@ -151,6 +152,7 @@ struct qat_sym_session {
uint32_t slice_types;
enum qat_sym_proto_flag qat_proto_flag;
qat_sym_build_request_t build_request[2];
+ struct rte_net_crc crc;
#ifndef RTE_QAT_OPENSSL
IMB_MGR *mb_mgr;
alignas(16) uint64_t expkey[4 * 15];
--
2.13.6
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v2 3/3] test/crc: replace thread-unsafe api functions
2024-10-01 18:11 [PATCH v2 0/3] net: add thread-safe crc api Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 2/3] crypto/qat: use process safe " Arkadiusz Kusztal
@ 2024-10-01 18:11 ` Arkadiusz Kusztal
2024-12-02 22:33 ` Stephen Hemminger
2 siblings, 1 reply; 17+ messages in thread
From: Arkadiusz Kusztal @ 2024-10-01 18:11 UTC (permalink / raw)
To: dev; +Cc: ferruh.yigit, kai.ji, brian.dooley, Arkadiusz Kusztal
This patch replaces thread-unsafe CRC functions with
the safe ones.
Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
---
app/test/test_crc.c | 168 +++++++++++++++++++++-------------------------------
1 file changed, 67 insertions(+), 101 deletions(-)
diff --git a/app/test/test_crc.c b/app/test/test_crc.c
index b85fca35fe..d7afec20b3 100644
--- a/app/test/test_crc.c
+++ b/app/test/test_crc.c
@@ -2,13 +2,13 @@
* Copyright(c) 2017-2020 Intel Corporation
*/
-#include "test.h"
-
#include <rte_hexdump.h>
#include <rte_malloc.h>
#include <rte_memcpy.h>
#include <rte_net_crc.h>
+#include "test.h"
+
#define CRC_VEC_LEN 32
#define CRC32_VEC_LEN1 1512
#define CRC32_VEC_LEN2 348
@@ -44,131 +44,97 @@ static const uint32_t crc32_vec_res = 0xb491aab4;
static const uint32_t crc32_vec1_res = 0xac54d294;
static const uint32_t crc32_vec2_res = 0xefaae02f;
static const uint32_t crc16_vec_res = 0x6bec;
-static const uint16_t crc16_vec1_res = 0x8cdd;
-static const uint16_t crc16_vec2_res = 0xec5b;
+static const uint32_t crc16_vec1_res = 0x8cdd;
+static const uint32_t crc16_vec2_res = 0xec5b;
static int
-crc_calc(const uint8_t *vec,
- uint32_t vec_len,
- enum rte_net_crc_type type)
+crc_all_algs(const char *desc, enum rte_net_crc_type type,
+ const uint8_t *data, int data_len, uint32_t res)
{
- /* compute CRC */
- uint32_t ret = rte_net_crc_calc(vec, vec_len, type);
+ struct rte_net_crc ctx;
+ uint32_t crc;
+ int ret = TEST_SUCCESS;
+
+ ctx = rte_net_crc_set(RTE_NET_CRC_SCALAR, type);
+ crc = rte_net_crc(&ctx, data, data_len);
+ if (crc != res) {
+ RTE_LOG(ERR, USER1, "TEST FAILED: %s SCALAR\n", desc);
+ debug_hexdump(stdout, "SCALAR", &crc, 4);
+ ret = TEST_FAILED;
+ }
- /* dump data on console */
- debug_hexdump(stdout, NULL, vec, vec_len);
+ ctx = rte_net_crc_set(RTE_NET_CRC_SSE42, type);
+ if (ctx.alg == RTE_NET_CRC_SSE42) {
+ crc = rte_net_crc(&ctx, data, data_len);
+ if (crc != res) {
+ RTE_LOG(ERR, USER1, "TEST FAILED: %s SSE42\n", desc);
+ debug_hexdump(stdout, "SSE", &crc, 4);
+ ret = TEST_FAILED;
+ }
+ }
+
+ ctx = rte_net_crc_set(RTE_NET_CRC_AVX512, type);
+ if (ctx.alg == RTE_NET_CRC_AVX512) {
+ crc = rte_net_crc(&ctx, data, data_len);
+ if (crc != res) {
+ RTE_LOG(ERR, USER1, "TEST FAILED: %s AVX512\n", desc);
+ debug_hexdump(stdout, "AVX512", &crc, 4);
+ ret = TEST_FAILED;
+ }
+ }
+
+ ctx = rte_net_crc_set(RTE_NET_CRC_NEON, type);
+ if (ctx.alg == RTE_NET_CRC_NEON) {
+ crc = rte_net_crc(&ctx, data, data_len);
+ if (crc != res) {
+ RTE_LOG(ERR, USER1, "TEST FAILED: %s NEON\n", desc);
+ debug_hexdump(stdout, "NEON", &crc, 4);
+ ret = TEST_FAILED;
+ }
+ }
- return ret;
+ return ret;
}
static int
-test_crc_calc(void)
-{
+crc_autotest(void)
+{ uint8_t *test_data;
uint32_t i;
- enum rte_net_crc_type type;
- uint8_t *test_data;
- uint32_t result;
- int error;
+ int ret = TEST_SUCCESS;
/* 32-bit ethernet CRC: Test 1 */
- type = RTE_NET_CRC32_ETH;
-
- result = crc_calc(crc_vec, CRC_VEC_LEN, type);
- if (result != crc32_vec_res)
- return -1;
+ ret = crc_all_algs("32-bit ethernet CRC: Test 1", RTE_NET_CRC32_ETH, crc_vec,
+ sizeof(crc_vec), crc32_vec_res);
/* 32-bit ethernet CRC: Test 2 */
test_data = rte_zmalloc(NULL, CRC32_VEC_LEN1, 0);
if (test_data == NULL)
return -7;
-
for (i = 0; i < CRC32_VEC_LEN1; i += 12)
rte_memcpy(&test_data[i], crc32_vec1, 12);
-
- result = crc_calc(test_data, CRC32_VEC_LEN1, type);
- if (result != crc32_vec1_res) {
- error = -2;
- goto fail;
- }
+ ret |= crc_all_algs("32-bit ethernet CRC: Test 2", RTE_NET_CRC32_ETH, test_data,
+ CRC32_VEC_LEN1, crc32_vec1_res);
/* 32-bit ethernet CRC: Test 3 */
+ memset(test_data, 0, CRC32_VEC_LEN1);
for (i = 0; i < CRC32_VEC_LEN2; i += 12)
rte_memcpy(&test_data[i], crc32_vec1, 12);
-
- result = crc_calc(test_data, CRC32_VEC_LEN2, type);
- if (result != crc32_vec2_res) {
- error = -3;
- goto fail;
- }
+ ret |= crc_all_algs("32-bit ethernet CRC: Test 3", RTE_NET_CRC32_ETH, test_data,
+ CRC32_VEC_LEN2, crc32_vec2_res);
/* 16-bit CCITT CRC: Test 4 */
- type = RTE_NET_CRC16_CCITT;
- result = crc_calc(crc_vec, CRC_VEC_LEN, type);
- if (result != crc16_vec_res) {
- error = -4;
- goto fail;
- }
- /* 16-bit CCITT CRC: Test 5 */
- result = crc_calc(crc16_vec1, CRC16_VEC_LEN1, type);
- if (result != crc16_vec1_res) {
- error = -5;
- goto fail;
- }
- /* 16-bit CCITT CRC: Test 6 */
- result = crc_calc(crc16_vec2, CRC16_VEC_LEN2, type);
- if (result != crc16_vec2_res) {
- error = -6;
- goto fail;
- }
-
- rte_free(test_data);
- return 0;
-
-fail:
- rte_free(test_data);
- return error;
-}
-
-static int
-test_crc(void)
-{
- int ret;
- /* set CRC scalar mode */
- rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);
-
- ret = test_crc_calc();
- if (ret < 0) {
- printf("test_crc (scalar): failed (%d)\n", ret);
- return ret;
- }
- /* set CRC sse4.2 mode */
- rte_net_crc_set_alg(RTE_NET_CRC_SSE42);
-
- ret = test_crc_calc();
- if (ret < 0) {
- printf("test_crc (x86_64_SSE4.2): failed (%d)\n", ret);
- return ret;
- }
+ crc_all_algs("16-bit CCITT CRC: Test 4", RTE_NET_CRC16_CCITT, crc_vec,
+ sizeof(crc_vec), crc16_vec_res);
- /* set CRC avx512 mode */
- rte_net_crc_set_alg(RTE_NET_CRC_AVX512);
-
- ret = test_crc_calc();
- if (ret < 0) {
- printf("test crc (x86_64 AVX512): failed (%d)\n", ret);
- return ret;
- }
-
- /* set CRC neon mode */
- rte_net_crc_set_alg(RTE_NET_CRC_NEON);
+ /* 16-bit CCITT CRC: Test 5 */
+ ret |= crc_all_algs("16-bit CCITT CRC: Test 5", RTE_NET_CRC16_CCITT, crc16_vec1,
+ CRC16_VEC_LEN1, crc16_vec1_res);
- ret = test_crc_calc();
- if (ret < 0) {
- printf("test crc (arm64 neon pmull): failed (%d)\n", ret);
- return ret;
- }
+ /* 16-bit CCITT CRC: Test 6 */
+ ret |= crc_all_algs("16-bit CCITT CRC: Test 6", RTE_NET_CRC16_CCITT, crc16_vec2,
+ CRC16_VEC_LEN2, crc16_vec2_res);
- return 0;
+ return ret;
}
-REGISTER_FAST_TEST(crc_autotest, true, true, test_crc);
+REGISTER_FAST_TEST(crc_autotest, true, true, crc_autotest);
--
2.13.6
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
@ 2024-10-01 21:44 ` Stephen Hemminger
2024-10-02 8:28 ` Kusztal, ArkadiuszX
2024-10-02 7:42 ` David Marchand
` (2 subsequent siblings)
3 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2024-10-01 21:44 UTC (permalink / raw)
To: Arkadiusz Kusztal; +Cc: dev, ferruh.yigit, kai.ji, brian.dooley
On Tue, 1 Oct 2024 19:11:48 +0100
Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com> wrote:
> diff --git a/lib/net/version.map b/lib/net/version.map
> index bec4ce23ea..47daf1464a 100644
> --- a/lib/net/version.map
> +++ b/lib/net/version.map
> @@ -4,11 +4,25 @@ DPDK_25 {
> rte_eth_random_addr;
> rte_ether_format_addr;
> rte_ether_unformat_addr;
> - rte_net_crc_calc;
> - rte_net_crc_set_alg;
> rte_net_get_ptype;
> rte_net_make_rarp_packet;
> rte_net_skip_ip6_ext;
> + rte_net_crc;
> + rte_net_crc_set;
>
> local: *;
> };
Please keep the symbols in version.map in alphabetic order.
And this would be an unannounced change to API/ABI which could cause
pain for some users.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
2024-10-01 21:44 ` Stephen Hemminger
@ 2024-10-02 7:42 ` David Marchand
2024-10-02 8:41 ` Kusztal, ArkadiuszX
2024-10-08 3:42 ` Ferruh Yigit
2024-12-02 22:36 ` Stephen Hemminger
3 siblings, 1 reply; 17+ messages in thread
From: David Marchand @ 2024-10-02 7:42 UTC (permalink / raw)
To: Arkadiusz Kusztal; +Cc: dev, ferruh.yigit, kai.ji, brian.dooley
On Tue, Oct 1, 2024 at 9:27 PM Arkadiusz Kusztal
<arkadiuszx.kusztal@intel.com> wrote:
>
> The current net CRC API is not thread-safe, this patch
> solves this by adding another, thread-safe API functions.
> This API is also safe to use across multiple processes,
> yet with limitations on max-simd-bitwidth, which will be checked only by
> the process that created the CRC context; all other processes will use
> the same CRC function when used with the same CRC context.
> It is an undefined behavior when process binaries are compiled
> with different SIMD capabilities when the same CRC context is used.
>
> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
I am lost... do you mean thread-safe ? or DPDK multi process safe?
For now, I don't see why we need a new API (and especially why we
should break the existing one..).
> ---
> lib/net/net_crc.h | 19 ++--
> lib/net/rte_net_crc.c | 309 +++++++++++++++-----------------------------------
> lib/net/rte_net_crc.h | 40 ++-----
> lib/net/version.map | 18 ++-
> 4 files changed, 124 insertions(+), 262 deletions(-)
>
> diff --git a/lib/net/net_crc.h b/lib/net/net_crc.h
> index 7a74d5406c..d220200685 100644
> --- a/lib/net/net_crc.h
> +++ b/lib/net/net_crc.h
> @@ -5,40 +5,41 @@
> #ifndef _NET_CRC_H_
> #define _NET_CRC_H_
>
> -/*
> - * Different implementations of CRC
> - */
> -
> -/* SSE4.2 */
> +#include <rte_compat.h>
>
> +__rte_internal
> void
> rte_net_crc_sse42_init(void);
>
> +__rte_internal
> uint32_t
> rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len);
>
> +__rte_internal
> uint32_t
> rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len);
>
> -/* AVX512 */
> -
> +__rte_internal
> void
> rte_net_crc_avx512_init(void);
>
> +__rte_internal
> uint32_t
> rte_crc16_ccitt_avx512_handler(const uint8_t *data, uint32_t data_len);
>
> +__rte_internal
> uint32_t
> rte_crc32_eth_avx512_handler(const uint8_t *data, uint32_t data_len);
>
> -/* NEON */
> -
> +__rte_internal
> void
> rte_net_crc_neon_init(void);
>
> +__rte_internal
> uint32_t
> rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
>
> +__rte_internal
> uint32_t
> rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
Exporting internals but not using them out of the library makes no sense.
--
David Marchand
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 21:44 ` Stephen Hemminger
@ 2024-10-02 8:28 ` Kusztal, ArkadiuszX
0 siblings, 0 replies; 17+ messages in thread
From: Kusztal, ArkadiuszX @ 2024-10-02 8:28 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: dev, ferruh.yigit, Ji, Kai, Dooley, Brian
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, October 1, 2024 11:44 PM
> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>
> Cc: dev@dpdk.org; ferruh.yigit@amd.com; Ji, Kai <kai.ji@intel.com>; Dooley,
> Brian <brian.dooley@intel.com>
> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>
> On Tue, 1 Oct 2024 19:11:48 +0100
> Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com> wrote:
>
> > diff --git a/lib/net/version.map b/lib/net/version.map index
> > bec4ce23ea..47daf1464a 100644
> > --- a/lib/net/version.map
> > +++ b/lib/net/version.map
> > @@ -4,11 +4,25 @@ DPDK_25 {
> > rte_eth_random_addr;
> > rte_ether_format_addr;
> > rte_ether_unformat_addr;
> > - rte_net_crc_calc;
> > - rte_net_crc_set_alg;
> > rte_net_get_ptype;
> > rte_net_make_rarp_packet;
> > rte_net_skip_ip6_ext;
> > + rte_net_crc;
> > + rte_net_crc_set;
> >
> > local: *;
> > };
>
> Please keep the symbols in version.map in alphabetic order.
My apologies, I will keep it in mind for the future
>
> And this would be an unannounced change to API/ABI which could cause pain
> for some users.
I know it is late for such a change, I may defer it then.
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-02 7:42 ` David Marchand
@ 2024-10-02 8:41 ` Kusztal, ArkadiuszX
2024-10-02 9:01 ` David Marchand
0 siblings, 1 reply; 17+ messages in thread
From: Kusztal, ArkadiuszX @ 2024-10-02 8:41 UTC (permalink / raw)
To: Marchand, David; +Cc: dev, ferruh.yigit, Ji, Kai, Dooley, Brian
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, October 2, 2024 9:42 AM
> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>
> Cc: dev@dpdk.org; ferruh.yigit@amd.com; Ji, Kai <kai.ji@intel.com>; Dooley,
> Brian <brian.dooley@intel.com>
> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>
> On Tue, Oct 1, 2024 at 9:27 PM Arkadiusz Kusztal
> <arkadiuszx.kusztal@intel.com> wrote:
> >
> > The current net CRC API is not thread-safe, this patch solves this by
> > adding another, thread-safe API functions.
> > This API is also safe to use across multiple processes, yet with
> > limitations on max-simd-bitwidth, which will be checked only by the
> > process that created the CRC context; all other processes will use the
> > same CRC function when used with the same CRC context.
> > It is an undefined behavior when process binaries are compiled with
> > different SIMD capabilities when the same CRC context is used.
> >
> > Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
>
> I am lost... do you mean thread-safe ? or DPDK multi process safe?
In this case both.
>
> For now, I don't see why we need a new API (and especially why we should
> break the existing one..).
In the current one, there exists a race condition that causes multi-threaded applications to crash; currently only QAT PMD is using it.
But I know it is late for such a change, so I probably should defer it. We can manage to fix it in a different way.
>
>
> > ---
> > lib/net/net_crc.h | 19 ++--
> > lib/net/rte_net_crc.c | 309
> > +++++++++++++++-----------------------------------
> > lib/net/rte_net_crc.h | 40 ++-----
> > lib/net/version.map | 18 ++-
> > 4 files changed, 124 insertions(+), 262 deletions(-)
> >
> > diff --git a/lib/net/net_crc.h b/lib/net/net_crc.h index
> > 7a74d5406c..d220200685 100644
> > --- a/lib/net/net_crc.h
> > +++ b/lib/net/net_crc.h
> > @@ -5,40 +5,41 @@
> > #ifndef _NET_CRC_H_
> > #define _NET_CRC_H_
> >
> > -/*
> > - * Different implementations of CRC
> > - */
> > -
> > -/* SSE4.2 */
> > +#include <rte_compat.h>
> >
> > +__rte_internal
> > void
> > rte_net_crc_sse42_init(void);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t
> > data_len);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len);
> >
> > -/* AVX512 */
> > -
> > +__rte_internal
> > void
> > rte_net_crc_avx512_init(void);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc16_ccitt_avx512_handler(const uint8_t *data, uint32_t
> > data_len);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc32_eth_avx512_handler(const uint8_t *data, uint32_t data_len);
> >
> > -/* NEON */
> > -
> > +__rte_internal
> > void
> > rte_net_crc_neon_init(void);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len);
> >
> > +__rte_internal
> > uint32_t
> > rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len);
>
> Exporting internals but not using them out of the library makes no sense.
>
So there must have been a misunderstanding on my part, the initial idea was to prevent the user from calling these functions.
>
> --
> David Marchand
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-02 8:41 ` Kusztal, ArkadiuszX
@ 2024-10-02 9:01 ` David Marchand
2024-10-02 9:16 ` Kusztal, ArkadiuszX
0 siblings, 1 reply; 17+ messages in thread
From: David Marchand @ 2024-10-02 9:01 UTC (permalink / raw)
To: Kusztal, ArkadiuszX; +Cc: dev, ferruh.yigit, Ji, Kai, Dooley, Brian
On Wed, Oct 2, 2024 at 10:42 AM Kusztal, ArkadiuszX
<arkadiuszx.kusztal@intel.com> wrote:
> > Exporting internals but not using them out of the library makes no sense.
> >
> So there must have been a misunderstanding on my part, the initial idea was to prevent the user from calling these functions.
net_crc.h is an internal header that is not delivered in a DPDK
installation consumed by users.
--
David Marchand
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-02 9:01 ` David Marchand
@ 2024-10-02 9:16 ` Kusztal, ArkadiuszX
0 siblings, 0 replies; 17+ messages in thread
From: Kusztal, ArkadiuszX @ 2024-10-02 9:16 UTC (permalink / raw)
To: Marchand, David; +Cc: dev, ferruh.yigit, Ji, Kai, Dooley, Brian
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Wednesday, October 2, 2024 11:02 AM
> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>
> Cc: dev@dpdk.org; ferruh.yigit@amd.com; Ji, Kai <kai.ji@intel.com>; Dooley,
> Brian <brian.dooley@intel.com>
> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>
> On Wed, Oct 2, 2024 at 10:42 AM Kusztal, ArkadiuszX
> <arkadiuszx.kusztal@intel.com> wrote:
> > > Exporting internals but not using them out of the library makes no sense.
> > >
> > So there must have been a misunderstanding on my part, the initial idea was to
> prevent the user from calling these functions.
>
> net_crc.h is an internal header that is not delivered in a DPDK installation
> consumed by users.
Got it, thanks for the clarification.
>
>
> --
> David Marchand
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
2024-10-01 21:44 ` Stephen Hemminger
2024-10-02 7:42 ` David Marchand
@ 2024-10-08 3:42 ` Ferruh Yigit
2024-10-08 20:51 ` Kusztal, ArkadiuszX
2024-12-02 22:36 ` Stephen Hemminger
3 siblings, 1 reply; 17+ messages in thread
From: Ferruh Yigit @ 2024-10-08 3:42 UTC (permalink / raw)
To: Kusztal, ArkadiuszX, Marchand, David; +Cc: dev, Ji, Kai, Dooley, Brian
On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
> The current net CRC API is not thread-safe, this patch
> solves this by adding another, thread-safe API functions.
> This API is also safe to use across multiple processes,
> yet with limitations on max-simd-bitwidth, which will be checked only by
> the process that created the CRC context; all other processes will use
> the same CRC function when used with the same CRC context.
> It is an undefined behavior when process binaries are compiled
> with different SIMD capabilities when the same CRC context is used.
>
> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
<...>
> +static struct
> +{
> + uint32_t (*f[RTE_NET_CRC_REQS])
> + (const uint8_t *data, uint32_t data_len);
>
It increases readability to typedef function pointers.
> +} handlers[RTE_NET_CRC_AVX512 + 1];
>
> -/**
> - * Reflect the bits about the middle
> - *
> - * @param val
> - * value to be reflected
> - *
> - * @return
> - * reflected value
> - */
> -static uint32_t
> +static inline uint32_t
>
Does changing to 'inline' required, as function is static compiler can
do the same.
> reflect_32bits(uint32_t val)
> {
> uint32_t i, res = 0;
> @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
> return res;
> }
>
> -static void
> -crc32_eth_init_lut(uint32_t poly,
> - uint32_t *lut)
> -{
> - uint32_t i, j;
> -
> - for (i = 0; i < CRC_LUT_SIZE; i++) {
> - uint32_t crc = reflect_32bits(i);
> -
> - for (j = 0; j < 8; j++) {
> - if (crc & 0x80000000L)
> - crc = (crc << 1) ^ poly;
> - else
> - crc <<= 1;
> - }
> - lut[i] = reflect_32bits(crc);
> - }
> -}
> -
> -static __rte_always_inline uint32_t
> +static inline uint32_t
>
Why not forcing inline anymore?
Are these inline changes related to the thread-safety?
> crc32_eth_calc_lut(const uint8_t *data,
> uint32_t data_len,
> uint32_t crc,
> @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
> return crc;
> }
>
> -static void
> -rte_net_crc_scalar_init(void)
> -{
> - /* 32-bit crc init */
> - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> -
> - /* 16-bit CRC init */
> - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> -}
> -
> static inline uint32_t
> -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
> +crc16_ccitt(const uint8_t *data, uint32_t data_len)
> {
> - /* return 16-bit CRC value */
>
Why not keep comments? Are they wrong?
<...>
> +static void
> +crc_scalar_init(void)
> +{
> + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> +
> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] = crc16_ccitt;
> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
>
+1 to remove global handlers pointer and add context,
But current handlers array content is static, it can be set when
defined, instead of functions.
<...>
> -static uint32_t
> -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> + enum rte_net_crc_type type)
> {
> - handlers = NULL;
> - if (max_simd_bitwidth == 0)
> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> -
> - handlers = avx512_vpclmulqdq_get_handlers();
> - if (handlers != NULL)
> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> - handlers = sse42_pclmulqdq_get_handlers();
> - if (handlers != NULL)
> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> - handlers = neon_pmull_get_handlers();
> - if (handlers != NULL)
> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> - handlers = handlers_scalar;
> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> -}
> + uint16_t max_simd_bitwidth;
>
> -/* Public API */
> -
> -void
> -rte_net_crc_set_alg(enum rte_net_crc_alg alg)
> -{
> - handlers = NULL;
> - if (max_simd_bitwidth == 0)
> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>
> switch (alg) {
> case RTE_NET_CRC_AVX512:
> - handlers = avx512_vpclmulqdq_get_handlers();
> - if (handlers != NULL)
> - break;
> +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
> + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
> + max_simd_bitwidth >= RTE_VECT_SIMD_512) {
> + return (struct rte_net_crc){ RTE_NET_CRC_AVX512, type };
> + }
> +#endif
> /* fall-through */
> case RTE_NET_CRC_SSE42:
> - handlers = sse42_pclmulqdq_get_handlers();
> - break; /* for x86, always break here */
> +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
> + if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type };
> + }
> +#endif
> + break;
> case RTE_NET_CRC_NEON:
> - handlers = neon_pmull_get_handlers();
> - /* fall-through */
> - case RTE_NET_CRC_SCALAR:
> - /* fall-through */
> +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
> + if (NEON_PMULL_CPU_SUPPORTED &&
> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type };
> + }
> +#endif
>
Is it more readable as following, up to you:
```
rte_net_crc_set(alg, type) {
enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
switch (alg) {
case AVX512:
new_alg = ..
case NEON:
new_alg = ..
}
return struct rte_net_crc){ new_alg, type };
```
> + break;
> default:
> break;
> }
> -
> - if (handlers == NULL)
> - handlers = handlers_scalar;
> + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
> }
>
> -uint32_t
> -rte_net_crc_calc(const void *data,
> - uint32_t data_len,
> - enum rte_net_crc_type type)
> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> + const void *data, const uint32_t data_len)
> {
> - uint32_t ret;
> - rte_net_crc_handler f_handle;
> -
> - f_handle = handlers[type];
> - ret = f_handle(data, data_len);
> -
> - return ret;
> + return handlers[ctx->alg].f[ctx->type](data, data_len);
>
'rte_net_crc()' gets input from user and "struct rte_net_crc" is not
opaque, so user can provide invalid input, ctx->alg & ctx->type.
To protect against it input values should be checked before using.
Or I think user not need to know the details of the "struct
rte_net_crc", so it can be an opaque variable for user.
<...>
> -/**
> - * CRC compute API
> - *
> - * @param data
> - * Pointer to the packet data for CRC computation
> - * @param data_len
> - * Data length for CRC computation
> - * @param type
> - * CRC type (enum rte_net_crc_type)
> - *
> - * @return
> - * CRC value
> - */
> -uint32_t
> -rte_net_crc_calc(const void *data,
> - uint32_t data_len,
> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> enum rte_net_crc_type type);
>
> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> + const void *data, const uint32_t data_len);
> +
>
As these are APIs, can you please add doxygen comments to them?
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/net/version.map b/lib/net/version.map
> index bec4ce23ea..47daf1464a 100644
> --- a/lib/net/version.map
> +++ b/lib/net/version.map
> @@ -4,11 +4,25 @@ DPDK_25 {
> rte_eth_random_addr;
> rte_ether_format_addr;
> rte_ether_unformat_addr;
> - rte_net_crc_calc;
> - rte_net_crc_set_alg;
> rte_net_get_ptype;
> rte_net_make_rarp_packet;
> rte_net_skip_ip6_ext;
> + rte_net_crc;
> + rte_net_crc_set;
>
> local: *;
> };
> +
> +INTERNAL {
> + global:
> +
> + rte_net_crc_sse42_init;
> + rte_crc16_ccitt_sse42_handler;
> + rte_crc32_eth_sse42_handler;
> + rte_net_crc_avx512_init;
> + rte_crc16_ccitt_avx512_handler;
> + rte_crc32_eth_avx512_handler;
> + rte_net_crc_neon_init;
> + rte_crc16_ccitt_neon_handler;
> + rte_crc32_eth_neon_handler;
> +};
>
+1 to David's comment, these are used only within component, no need to
export.
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-08 3:42 ` Ferruh Yigit
@ 2024-10-08 20:51 ` Kusztal, ArkadiuszX
2024-10-09 1:03 ` Ferruh Yigit
0 siblings, 1 reply; 17+ messages in thread
From: Kusztal, ArkadiuszX @ 2024-10-08 20:51 UTC (permalink / raw)
To: Ferruh Yigit, Marchand, David; +Cc: dev, Ji, Kai, Dooley, Brian
Hi Ferruh,
Thanks for the review, comments inline,
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Tuesday, October 8, 2024 5:43 AM
> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand, David
> <david.marchand@redhat.com>
> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
> <brian.dooley@intel.com>
> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>
> On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
> > The current net CRC API is not thread-safe, this patch solves this by
> > adding another, thread-safe API functions.
> > This API is also safe to use across multiple processes, yet with
> > limitations on max-simd-bitwidth, which will be checked only by the
> > process that created the CRC context; all other processes will use the
> > same CRC function when used with the same CRC context.
> > It is an undefined behavior when process binaries are compiled with
> > different SIMD capabilities when the same CRC context is used.
> >
> > Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
>
> <...>
>
> > +static struct
> > +{
> > + uint32_t (*f[RTE_NET_CRC_REQS])
> > + (const uint8_t *data, uint32_t data_len);
> >
>
> It increases readability to typedef function pointers.
Agree, though this typedef would be used here only, that’s why I left it out.
But I can add it then.
>
>
> > +} handlers[RTE_NET_CRC_AVX512 + 1];
> >
> > -/**
> > - * Reflect the bits about the middle
> > - *
> > - * @param val
> > - * value to be reflected
> > - *
> > - * @return
> > - * reflected value
> > - */
> > -static uint32_t
> > +static inline uint32_t
> >
>
> Does changing to 'inline' required, as function is static compiler can do the
> same.
True, though it may be more readable sometimes.
Of course there is no way that in O3 these functions would not be inlined by the compiler, regardless if the inline hint is present or not.
>
> > reflect_32bits(uint32_t val)
> > {
> > uint32_t i, res = 0;
> > @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
> > return res;
> > }
> >
> > -static void
> > -crc32_eth_init_lut(uint32_t poly,
> > - uint32_t *lut)
> > -{
> > - uint32_t i, j;
> > -
> > - for (i = 0; i < CRC_LUT_SIZE; i++) {
> > - uint32_t crc = reflect_32bits(i);
> > -
> > - for (j = 0; j < 8; j++) {
> > - if (crc & 0x80000000L)
> > - crc = (crc << 1) ^ poly;
> > - else
> > - crc <<= 1;
> > - }
> > - lut[i] = reflect_32bits(crc);
> > - }
> > -}
> > -
> > -static __rte_always_inline uint32_t
> > +static inline uint32_t
> >
>
> Why not forcing inline anymore?
> Are these inline changes related to the thread-safety?
O3 will inline it anyway, and with always_inline it will be inline even in debug mode. I just see no reason forcing it upon the compiler.
>
> > crc32_eth_calc_lut(const uint8_t *data,
> > uint32_t data_len,
> > uint32_t crc,
> > @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
> > return crc;
> > }
> >
> > -static void
> > -rte_net_crc_scalar_init(void)
> > -{
> > - /* 32-bit crc init */
> > - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> > -
> > - /* 16-bit CRC init */
> > - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> > -}
> > -
> > static inline uint32_t
> > -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
> > +crc16_ccitt(const uint8_t *data, uint32_t data_len)
> > {
> > - /* return 16-bit CRC value */
> >
>
> Why not keep comments? Are they wrong?
Functions names are very self-explanatory, that’s why I dropped comments. I can add comments if needed.
>
> <...>
>
> > +static void
> > +crc_scalar_init(void)
> > +{
> > + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> > + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> > +
> > + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] =
> crc16_ccitt;
> > + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
> >
>
> +1 to remove global handlers pointer and add context,
>
> But current handlers array content is static, it can be set when defined, instead
> of functions.
Can do it for scalars, but for SIMD there is this runtime check like this:
if (AVX512_VPCLMULQDQ_CPU_SUPPORTED) {
So compiled binary on AVX512 machine could filter it out on the machine which does not support it.
There is no NULL check in crc function so it would not change much when called -> Invalid address vs Invalid instruction.
There are not many checks there, as this is CRC after all, it should be a small as possible, yet probably NULL check could be advisable in crc function then.
>
> <...>
>
> > -static uint32_t
> > -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
> > +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> > + enum rte_net_crc_type type)
> > {
> > - handlers = NULL;
> > - if (max_simd_bitwidth == 0)
> > - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> > -
> > - handlers = avx512_vpclmulqdq_get_handlers();
> > - if (handlers != NULL)
> > - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> > - handlers = sse42_pclmulqdq_get_handlers();
> > - if (handlers != NULL)
> > - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> > - handlers = neon_pmull_get_handlers();
> > - if (handlers != NULL)
> > - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> > - handlers = handlers_scalar;
> > - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> > -}
> > + uint16_t max_simd_bitwidth;
> >
> > -/* Public API */
> > -
> > -void
> > -rte_net_crc_set_alg(enum rte_net_crc_alg alg) -{
> > - handlers = NULL;
> > - if (max_simd_bitwidth == 0)
> > - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> > + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> >
> > switch (alg) {
> > case RTE_NET_CRC_AVX512:
> > - handlers = avx512_vpclmulqdq_get_handlers();
> > - if (handlers != NULL)
> > - break;
> > +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
> > + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
> > + max_simd_bitwidth >= RTE_VECT_SIMD_512) {
> > + return (struct rte_net_crc){ RTE_NET_CRC_AVX512,
> type };
> > + }
> > +#endif
> > /* fall-through */
> > case RTE_NET_CRC_SSE42:
> > - handlers = sse42_pclmulqdq_get_handlers();
> > - break; /* for x86, always break here */
> > +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
> > + if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
> > + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> > + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type
> };
> > + }
> > +#endif
> > + break;
> > case RTE_NET_CRC_NEON:
> > - handlers = neon_pmull_get_handlers();
> > - /* fall-through */
> > - case RTE_NET_CRC_SCALAR:
> > - /* fall-through */
> > +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
> > + if (NEON_PMULL_CPU_SUPPORTED &&
> > + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> > + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type
> };
> > + }
> > +#endif
> >
>
> Is it more readable as following, up to you:
Agree, I will change it.
>
> ```
> rte_net_crc_set(alg, type) {
> enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
> switch (alg) {
> case AVX512:
> new_alg = ..
> case NEON:
> new_alg = ..
> }
> return struct rte_net_crc){ new_alg, type }; ```
>
>
>
>
> > + break;
> > default:
> > break;
> > }
> > -
> > - if (handlers == NULL)
> > - handlers = handlers_scalar;
> > + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
> > }
> >
> > -uint32_t
> > -rte_net_crc_calc(const void *data,
> > - uint32_t data_len,
> > - enum rte_net_crc_type type)
> > +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> > + const void *data, const uint32_t data_len)
> > {
> > - uint32_t ret;
> > - rte_net_crc_handler f_handle;
> > -
> > - f_handle = handlers[type];
> > - ret = f_handle(data, data_len);
> > -
> > - return ret;
> > + return handlers[ctx->alg].f[ctx->type](data, data_len);
> >
>
> 'rte_net_crc()' gets input from user and "struct rte_net_crc" is not opaque, so
> user can provide invalid input, ctx->alg & ctx->type.
> To protect against it input values should be checked before using.
>
> Or I think user not need to know the details of the "struct rte_net_crc", so it can
> be an opaque variable for user.
I would love it to be opaque, but then I would have to return a pointer, which then would involve some allocations/deallocations and I wanted to keep is as simple as possible.
So probably the checks would be a way to go.
>
> <...>
>
> > -/**
> > - * CRC compute API
> > - *
> > - * @param data
> > - * Pointer to the packet data for CRC computation
> > - * @param data_len
> > - * Data length for CRC computation
> > - * @param type
> > - * CRC type (enum rte_net_crc_type)
> > - *
> > - * @return
> > - * CRC value
> > - */
> > -uint32_t
> > -rte_net_crc_calc(const void *data,
> > - uint32_t data_len,
> > +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> > enum rte_net_crc_type type);
> >
> > +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> > + const void *data, const uint32_t data_len);
> > +
> >
>
> As these are APIs, can you please add doxygen comments to them?
+1
I think this change could be deferred to 25.03.
Adding this API without removing the old one should be possible without any unwanted consequences?
I still have some second thoughts about this max-simd-width. DPDK does not impose any restrictions on this parameter in the multi-process usage,
there may be some room to alter some things there.
>
> > #ifdef __cplusplus
> > }
> > #endif
> > diff --git a/lib/net/version.map b/lib/net/version.map index
> > bec4ce23ea..47daf1464a 100644
> > --- a/lib/net/version.map
> > +++ b/lib/net/version.map
> > @@ -4,11 +4,25 @@ DPDK_25 {
> > rte_eth_random_addr;
> > rte_ether_format_addr;
> > rte_ether_unformat_addr;
> > - rte_net_crc_calc;
> > - rte_net_crc_set_alg;
> > rte_net_get_ptype;
> > rte_net_make_rarp_packet;
> > rte_net_skip_ip6_ext;
> > + rte_net_crc;
> > + rte_net_crc_set;
> >
> > local: *;
> > };
> > +
> > +INTERNAL {
> > + global:
> > +
> > + rte_net_crc_sse42_init;
> > + rte_crc16_ccitt_sse42_handler;
> > + rte_crc32_eth_sse42_handler;
> > + rte_net_crc_avx512_init;
> > + rte_crc16_ccitt_avx512_handler;
> > + rte_crc32_eth_avx512_handler;
> > + rte_net_crc_neon_init;
> > + rte_crc16_ccitt_neon_handler;
> > + rte_crc32_eth_neon_handler;
> > +};
> >
>
> +1 to David's comment, these are used only within component, no need to
> export.
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-08 20:51 ` Kusztal, ArkadiuszX
@ 2024-10-09 1:03 ` Ferruh Yigit
2024-10-09 7:48 ` Kusztal, ArkadiuszX
0 siblings, 1 reply; 17+ messages in thread
From: Ferruh Yigit @ 2024-10-09 1:03 UTC (permalink / raw)
To: Kusztal, ArkadiuszX, Marchand, David; +Cc: dev, Ji, Kai, Dooley, Brian
On 10/8/2024 9:51 PM, Kusztal, ArkadiuszX wrote:
> Hi Ferruh,
> Thanks for the review, comments inline,
>
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@amd.com>
>> Sent: Tuesday, October 8, 2024 5:43 AM
>> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand, David
>> <david.marchand@redhat.com>
>> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
>> <brian.dooley@intel.com>
>> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>>
>> On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
>>> The current net CRC API is not thread-safe, this patch solves this by
>>> adding another, thread-safe API functions.
>>> This API is also safe to use across multiple processes, yet with
>>> limitations on max-simd-bitwidth, which will be checked only by the
>>> process that created the CRC context; all other processes will use the
>>> same CRC function when used with the same CRC context.
>>> It is an undefined behavior when process binaries are compiled with
>>> different SIMD capabilities when the same CRC context is used.
>>>
>>> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
>>
>> <...>
>>
>>> +static struct
>>> +{
>>> + uint32_t (*f[RTE_NET_CRC_REQS])
>>> + (const uint8_t *data, uint32_t data_len);
>>>
>>
>> It increases readability to typedef function pointers.
>
> Agree, though this typedef would be used here only, that’s why I left it out.
> But I can add it then.
>
>>
>>
>>> +} handlers[RTE_NET_CRC_AVX512 + 1];
>>>
>>> -/**
>>> - * Reflect the bits about the middle
>>> - *
>>> - * @param val
>>> - * value to be reflected
>>> - *
>>> - * @return
>>> - * reflected value
>>> - */
>>> -static uint32_t
>>> +static inline uint32_t
>>>
>>
>> Does changing to 'inline' required, as function is static compiler can do the
>> same.
>
> True, though it may be more readable sometimes.
> Of course there is no way that in O3 these functions would not be inlined by the compiler, regardless if the inline hint is present or not.
>
>>
>>> reflect_32bits(uint32_t val)
>>> {
>>> uint32_t i, res = 0;
>>> @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
>>> return res;
>>> }
>>>
>>> -static void
>>> -crc32_eth_init_lut(uint32_t poly,
>>> - uint32_t *lut)
>>> -{
>>> - uint32_t i, j;
>>> -
>>> - for (i = 0; i < CRC_LUT_SIZE; i++) {
>>> - uint32_t crc = reflect_32bits(i);
>>> -
>>> - for (j = 0; j < 8; j++) {
>>> - if (crc & 0x80000000L)
>>> - crc = (crc << 1) ^ poly;
>>> - else
>>> - crc <<= 1;
>>> - }
>>> - lut[i] = reflect_32bits(crc);
>>> - }
>>> -}
>>> -
>>> -static __rte_always_inline uint32_t
>>> +static inline uint32_t
>>>
>>
>> Why not forcing inline anymore?
>> Are these inline changes related to the thread-safety?
>
> O3 will inline it anyway, and with always_inline it will be inline even in debug mode. I just see no reason forcing it upon the compiler.
>
>>
>>> crc32_eth_calc_lut(const uint8_t *data,
>>> uint32_t data_len,
>>> uint32_t crc,
>>> @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
>>> return crc;
>>> }
>>>
>>> -static void
>>> -rte_net_crc_scalar_init(void)
>>> -{
>>> - /* 32-bit crc init */
>>> - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
>>> -
>>> - /* 16-bit CRC init */
>>> - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
>>> -}
>>> -
>>> static inline uint32_t
>>> -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
>>> +crc16_ccitt(const uint8_t *data, uint32_t data_len)
>>> {
>>> - /* return 16-bit CRC value */
>>>
>>
>> Why not keep comments? Are they wrong?
>
> Functions names are very self-explanatory, that’s why I dropped comments. I can add comments if needed.
>
I am for restricting changes to the target of the patch which is making
CRC calculation thread safe, unless code changes invalidates the
comments, lets keep them. Same goes with inline related modifications.
>>
>> <...>
>>
>>> +static void
>>> +crc_scalar_init(void)
>>> +{
>>> + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
>>> + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
>>> +
>>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] =
>> crc16_ccitt;
>>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
>>>
>>
>> +1 to remove global handlers pointer and add context,
>>
>> But current handlers array content is static, it can be set when defined, instead
>> of functions.
>
> Can do it for scalars, but for SIMD there is this runtime check like this:
> if (AVX512_VPCLMULQDQ_CPU_SUPPORTED) {
> So compiled binary on AVX512 machine could filter it out on the machine which does not support it.
> There is no NULL check in crc function so it would not change much when called -> Invalid address vs Invalid instruction.
> There are not many checks there, as this is CRC after all, it should be a small as possible, yet probably NULL check could be advisable in crc function then.
>
There is already AVX512_VPCLMULQDQ_CPU_SUPPORTED etc checks in
'rte_net_crc_set()'.
So does it work to update 'handlers' statically, without condition, but
have conditions when use them.
>
>>
>> <...>
>>
>>> -static uint32_t
>>> -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
>>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
>>> + enum rte_net_crc_type type)
>>> {
>>> - handlers = NULL;
>>> - if (max_simd_bitwidth == 0)
>>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>> -
>>> - handlers = avx512_vpclmulqdq_get_handlers();
>>> - if (handlers != NULL)
>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>> - handlers = sse42_pclmulqdq_get_handlers();
>>> - if (handlers != NULL)
>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>> - handlers = neon_pmull_get_handlers();
>>> - if (handlers != NULL)
>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>> - handlers = handlers_scalar;
>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>> -}
>>> + uint16_t max_simd_bitwidth;
>>>
>>> -/* Public API */
>>> -
>>> -void
>>> -rte_net_crc_set_alg(enum rte_net_crc_alg alg) -{
>>> - handlers = NULL;
>>> - if (max_simd_bitwidth == 0)
>>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>> + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>>
>>> switch (alg) {
>>> case RTE_NET_CRC_AVX512:
>>> - handlers = avx512_vpclmulqdq_get_handlers();
>>> - if (handlers != NULL)
>>> - break;
>>> +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
>>> + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
>>> + max_simd_bitwidth >= RTE_VECT_SIMD_512) {
>>> + return (struct rte_net_crc){ RTE_NET_CRC_AVX512,
>> type };
>>> + }
>>> +#endif
>>> /* fall-through */
>>> case RTE_NET_CRC_SSE42:
>>> - handlers = sse42_pclmulqdq_get_handlers();
>>> - break; /* for x86, always break here */
>>> +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
>>> + if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
>>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
>>> + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type
>> };
>>> + }
>>> +#endif
>>> + break;
>>> case RTE_NET_CRC_NEON:
>>> - handlers = neon_pmull_get_handlers();
>>> - /* fall-through */
>>> - case RTE_NET_CRC_SCALAR:
>>> - /* fall-through */
>>> +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
>>> + if (NEON_PMULL_CPU_SUPPORTED &&
>>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
>>> + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type
>> };
>>> + }
>>> +#endif
>>>
>>
>> Is it more readable as following, up to you:
>
> Agree, I will change it.
>
>>
>> ```
>> rte_net_crc_set(alg, type) {
>> enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
>> switch (alg) {
>> case AVX512:
>> new_alg = ..
>> case NEON:
>> new_alg = ..
>> }
>> return struct rte_net_crc){ new_alg, type }; ```
>>
>>
>>
>>
>>> + break;
>>> default:
>>> break;
>>> }
>>> -
>>> - if (handlers == NULL)
>>> - handlers = handlers_scalar;
>>> + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
>>> }
>>>
>>> -uint32_t
>>> -rte_net_crc_calc(const void *data,
>>> - uint32_t data_len,
>>> - enum rte_net_crc_type type)
>>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
>>> + const void *data, const uint32_t data_len)
>>> {
>>> - uint32_t ret;
>>> - rte_net_crc_handler f_handle;
>>> -
>>> - f_handle = handlers[type];
>>> - ret = f_handle(data, data_len);
>>> -
>>> - return ret;
>>> + return handlers[ctx->alg].f[ctx->type](data, data_len);
>>>
>>
>> 'rte_net_crc()' gets input from user and "struct rte_net_crc" is not opaque, so
>> user can provide invalid input, ctx->alg & ctx->type.
>> To protect against it input values should be checked before using.
>>
>> Or I think user not need to know the details of the "struct rte_net_crc", so it can
>> be an opaque variable for user.
>
> I would love it to be opaque, but then I would have to return a pointer, which then would involve some allocations/deallocations and I wanted to keep is as simple as possible.
> So probably the checks would be a way to go.
>
True, +1 for simplicity.
>>
>> <...>
>>
>>> -/**
>>> - * CRC compute API
>>> - *
>>> - * @param data
>>> - * Pointer to the packet data for CRC computation
>>> - * @param data_len
>>> - * Data length for CRC computation
>>> - * @param type
>>> - * CRC type (enum rte_net_crc_type)
>>> - *
>>> - * @return
>>> - * CRC value
>>> - */
>>> -uint32_t
>>> -rte_net_crc_calc(const void *data,
>>> - uint32_t data_len,
>>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
>>> enum rte_net_crc_type type);
>>>
>>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
>>> + const void *data, const uint32_t data_len);
>>> +
>>>
>>
>> As these are APIs, can you please add doxygen comments to them?
> +1
> I think this change could be deferred to 25.03.
> Adding this API without removing the old one should be possible without any unwanted consequences?
>
This is not a new functionality but replacement of existing one, so it
will be confusing to have two set of APIs for same functionality with
similar names:
rte_net_crc_calc() and rte_net_crc()
rte_net_crc_set_alg() and rte_net_crc_set()
Also there are some internal functions used by these APIs and supporting
both new and old may force to have two version of these internal
functions and it will create complexity/noise.
As this release is ABI break release, it is easier to update APIs
(although deprecation notice is missing for this change).
As an alternative option, do you think applying ABI versioning in 25.03
works for these APIs?
If so, old version can be cleaned in v25.11.
> I still have some second thoughts about this max-simd-width. DPDK does not impose any restrictions on this parameter in the multi-process usage,
> there may be some room to alter some things there.
>
>>
>>> #ifdef __cplusplus
>>> }
>>> #endif
>>> diff --git a/lib/net/version.map b/lib/net/version.map index
>>> bec4ce23ea..47daf1464a 100644
>>> --- a/lib/net/version.map
>>> +++ b/lib/net/version.map
>>> @@ -4,11 +4,25 @@ DPDK_25 {
>>> rte_eth_random_addr;
>>> rte_ether_format_addr;
>>> rte_ether_unformat_addr;
>>> - rte_net_crc_calc;
>>> - rte_net_crc_set_alg;
>>> rte_net_get_ptype;
>>> rte_net_make_rarp_packet;
>>> rte_net_skip_ip6_ext;
>>> + rte_net_crc;
>>> + rte_net_crc_set;
>>>
>>> local: *;
>>> };
>>> +
>>> +INTERNAL {
>>> + global:
>>> +
>>> + rte_net_crc_sse42_init;
>>> + rte_crc16_ccitt_sse42_handler;
>>> + rte_crc32_eth_sse42_handler;
>>> + rte_net_crc_avx512_init;
>>> + rte_crc16_ccitt_avx512_handler;
>>> + rte_crc32_eth_avx512_handler;
>>> + rte_net_crc_neon_init;
>>> + rte_crc16_ccitt_neon_handler;
>>> + rte_crc32_eth_neon_handler;
>>> +};
>>>
>>
>> +1 to David's comment, these are used only within component, no need to
>> export.
>>
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-09 1:03 ` Ferruh Yigit
@ 2024-10-09 7:48 ` Kusztal, ArkadiuszX
2024-10-09 9:11 ` Ferruh Yigit
0 siblings, 1 reply; 17+ messages in thread
From: Kusztal, ArkadiuszX @ 2024-10-09 7:48 UTC (permalink / raw)
To: Ferruh Yigit, Marchand, David; +Cc: dev, Ji, Kai, Dooley, Brian
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Wednesday, October 9, 2024 3:03 AM
> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand, David
> <david.marchand@redhat.com>
> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
> <brian.dooley@intel.com>
> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>
> On 10/8/2024 9:51 PM, Kusztal, ArkadiuszX wrote:
> > Hi Ferruh,
> > Thanks for the review, comments inline,
> >
> >> -----Original Message-----
> >> From: Ferruh Yigit <ferruh.yigit@amd.com>
> >> Sent: Tuesday, October 8, 2024 5:43 AM
> >> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand,
> >> David <david.marchand@redhat.com>
> >> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
> >> <brian.dooley@intel.com>
> >> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
> >>
> >> On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
> >>> The current net CRC API is not thread-safe, this patch solves this
> >>> by adding another, thread-safe API functions.
> >>> This API is also safe to use across multiple processes, yet with
> >>> limitations on max-simd-bitwidth, which will be checked only by the
> >>> process that created the CRC context; all other processes will use
> >>> the same CRC function when used with the same CRC context.
> >>> It is an undefined behavior when process binaries are compiled with
> >>> different SIMD capabilities when the same CRC context is used.
> >>>
> >>> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
> >>
> >> <...>
> >>
> >>> +static struct
> >>> +{
> >>> + uint32_t (*f[RTE_NET_CRC_REQS])
> >>> + (const uint8_t *data, uint32_t data_len);
> >>>
> >>
> >> It increases readability to typedef function pointers.
> >
> > Agree, though this typedef would be used here only, that’s why I left it out.
> > But I can add it then.
> >
> >>
> >>
> >>> +} handlers[RTE_NET_CRC_AVX512 + 1];
> >>>
> >>> -/**
> >>> - * Reflect the bits about the middle
> >>> - *
> >>> - * @param val
> >>> - * value to be reflected
> >>> - *
> >>> - * @return
> >>> - * reflected value
> >>> - */
> >>> -static uint32_t
> >>> +static inline uint32_t
> >>>
> >>
> >> Does changing to 'inline' required, as function is static compiler
> >> can do the same.
> >
> > True, though it may be more readable sometimes.
> > Of course there is no way that in O3 these functions would not be inlined by
> the compiler, regardless if the inline hint is present or not.
> >
> >>
> >>> reflect_32bits(uint32_t val)
> >>> {
> >>> uint32_t i, res = 0;
> >>> @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
> >>> return res;
> >>> }
> >>>
> >>> -static void
> >>> -crc32_eth_init_lut(uint32_t poly,
> >>> - uint32_t *lut)
> >>> -{
> >>> - uint32_t i, j;
> >>> -
> >>> - for (i = 0; i < CRC_LUT_SIZE; i++) {
> >>> - uint32_t crc = reflect_32bits(i);
> >>> -
> >>> - for (j = 0; j < 8; j++) {
> >>> - if (crc & 0x80000000L)
> >>> - crc = (crc << 1) ^ poly;
> >>> - else
> >>> - crc <<= 1;
> >>> - }
> >>> - lut[i] = reflect_32bits(crc);
> >>> - }
> >>> -}
> >>> -
> >>> -static __rte_always_inline uint32_t
> >>> +static inline uint32_t
> >>>
> >>
> >> Why not forcing inline anymore?
> >> Are these inline changes related to the thread-safety?
> >
> > O3 will inline it anyway, and with always_inline it will be inline even in debug
> mode. I just see no reason forcing it upon the compiler.
> >
> >>
> >>> crc32_eth_calc_lut(const uint8_t *data,
> >>> uint32_t data_len,
> >>> uint32_t crc,
> >>> @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
> >>> return crc;
> >>> }
> >>>
> >>> -static void
> >>> -rte_net_crc_scalar_init(void)
> >>> -{
> >>> - /* 32-bit crc init */
> >>> - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> >>> -
> >>> - /* 16-bit CRC init */
> >>> - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> >>> -}
> >>> -
> >>> static inline uint32_t
> >>> -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
> >>> +crc16_ccitt(const uint8_t *data, uint32_t data_len)
> >>> {
> >>> - /* return 16-bit CRC value */
> >>>
> >>
> >> Why not keep comments? Are they wrong?
> >
> > Functions names are very self-explanatory, that’s why I dropped comments. I
> can add comments if needed.
> >
>
> I am for restricting changes to the target of the patch which is making CRC
> calculation thread safe, unless code changes invalidates the comments, lets
> keep them. Same goes with inline related modifications.
>
> >>
> >> <...>
> >>
> >>> +static void
> >>> +crc_scalar_init(void)
> >>> +{
> >>> + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> >>> + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> >>> +
> >>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] =
> >> crc16_ccitt;
> >>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
> >>>
> >>
> >> +1 to remove global handlers pointer and add context,
> >>
> >> But current handlers array content is static, it can be set when
> >> defined, instead of functions.
> >
> > Can do it for scalars, but for SIMD there is this runtime check like this:
> > if (AVX512_VPCLMULQDQ_CPU_SUPPORTED) { So compiled binary on
> AVX512
> > machine could filter it out on the machine which does not support it.
> > There is no NULL check in crc function so it would not change much when
> called -> Invalid address vs Invalid instruction.
> > There are not many checks there, as this is CRC after all, it should be a small as
> possible, yet probably NULL check could be advisable in crc function then.
> >
>
> There is already AVX512_VPCLMULQDQ_CPU_SUPPORTED etc checks in
> 'rte_net_crc_set()'.
> So does it work to update 'handlers' statically, without condition, but have
> conditions when use them.
>
> >
> >>
> >> <...>
> >>
> >>> -static uint32_t
> >>> -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t
> >>> data_len)
> >>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> >>> + enum rte_net_crc_type type)
> >>> {
> >>> - handlers = NULL;
> >>> - if (max_simd_bitwidth == 0)
> >>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> >>> -
> >>> - handlers = avx512_vpclmulqdq_get_handlers();
> >>> - if (handlers != NULL)
> >>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> >>> - handlers = sse42_pclmulqdq_get_handlers();
> >>> - if (handlers != NULL)
> >>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> >>> - handlers = neon_pmull_get_handlers();
> >>> - if (handlers != NULL)
> >>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> >>> - handlers = handlers_scalar;
> >>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
> >>> -}
> >>> + uint16_t max_simd_bitwidth;
> >>>
> >>> -/* Public API */
> >>> -
> >>> -void
> >>> -rte_net_crc_set_alg(enum rte_net_crc_alg alg) -{
> >>> - handlers = NULL;
> >>> - if (max_simd_bitwidth == 0)
> >>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> >>> + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> >>>
> >>> switch (alg) {
> >>> case RTE_NET_CRC_AVX512:
> >>> - handlers = avx512_vpclmulqdq_get_handlers();
> >>> - if (handlers != NULL)
> >>> - break;
> >>> +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
> >>> + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
> >>> + max_simd_bitwidth >= RTE_VECT_SIMD_512) {
> >>> + return (struct rte_net_crc){ RTE_NET_CRC_AVX512,
> >> type };
> >>> + }
> >>> +#endif
> >>> /* fall-through */
> >>> case RTE_NET_CRC_SSE42:
> >>> - handlers = sse42_pclmulqdq_get_handlers();
> >>> - break; /* for x86, always break here */
> >>> +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
> >>> + if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
> >>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> >>> + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type
> >> };
> >>> + }
> >>> +#endif
> >>> + break;
> >>> case RTE_NET_CRC_NEON:
> >>> - handlers = neon_pmull_get_handlers();
> >>> - /* fall-through */
> >>> - case RTE_NET_CRC_SCALAR:
> >>> - /* fall-through */
> >>> +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
> >>> + if (NEON_PMULL_CPU_SUPPORTED &&
> >>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> >>> + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type
> >> };
> >>> + }
> >>> +#endif
> >>>
> >>
> >> Is it more readable as following, up to you:
> >
> > Agree, I will change it.
> >
> >>
> >> ```
> >> rte_net_crc_set(alg, type) {
> >> enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
> >> switch (alg) {
> >> case AVX512:
> >> new_alg = ..
> >> case NEON:
> >> new_alg = ..
> >> }
> >> return struct rte_net_crc){ new_alg, type }; ```
> >>
> >>
> >>
> >>
> >>> + break;
> >>> default:
> >>> break;
> >>> }
> >>> -
> >>> - if (handlers == NULL)
> >>> - handlers = handlers_scalar;
> >>> + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
> >>> }
> >>>
> >>> -uint32_t
> >>> -rte_net_crc_calc(const void *data,
> >>> - uint32_t data_len,
> >>> - enum rte_net_crc_type type)
> >>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> >>> + const void *data, const uint32_t data_len)
> >>> {
> >>> - uint32_t ret;
> >>> - rte_net_crc_handler f_handle;
> >>> -
> >>> - f_handle = handlers[type];
> >>> - ret = f_handle(data, data_len);
> >>> -
> >>> - return ret;
> >>> + return handlers[ctx->alg].f[ctx->type](data, data_len);
> >>>
> >>
> >> 'rte_net_crc()' gets input from user and "struct rte_net_crc" is not
> >> opaque, so user can provide invalid input, ctx->alg & ctx->type.
> >> To protect against it input values should be checked before using.
> >>
> >> Or I think user not need to know the details of the "struct
> >> rte_net_crc", so it can be an opaque variable for user.
> >
> > I would love it to be opaque, but then I would have to return a pointer, which
> then would involve some allocations/deallocations and I wanted to keep is as
> simple as possible.
> > So probably the checks would be a way to go.
> >
>
> True, +1 for simplicity.
>
> >>
> >> <...>
> >>
> >>> -/**
> >>> - * CRC compute API
> >>> - *
> >>> - * @param data
> >>> - * Pointer to the packet data for CRC computation
> >>> - * @param data_len
> >>> - * Data length for CRC computation
> >>> - * @param type
> >>> - * CRC type (enum rte_net_crc_type)
> >>> - *
> >>> - * @return
> >>> - * CRC value
> >>> - */
> >>> -uint32_t
> >>> -rte_net_crc_calc(const void *data,
> >>> - uint32_t data_len,
> >>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> >>> enum rte_net_crc_type type);
> >>>
> >>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> >>> + const void *data, const uint32_t data_len);
> >>> +
> >>>
> >>
> >> As these are APIs, can you please add doxygen comments to them?
> > +1
> > I think this change could be deferred to 25.03.
> > Adding this API without removing the old one should be possible without any
> unwanted consequences?
> >
>
> This is not a new functionality but replacement of existing one, so it will be
> confusing to have two set of APIs for same functionality with similar names:
> rte_net_crc_calc() and rte_net_crc()
> rte_net_crc_set_alg() and rte_net_crc_set()
>
> Also there are some internal functions used by these APIs and supporting both
> new and old may force to have two version of these internal functions and it will
> create complexity/noise.
>
> As this release is ABI break release, it is easier to update APIs (although
> deprecation notice is missing for this change).
>
>
> As an alternative option, do you think applying ABI versioning in 25.03 works for
> these APIs?
> If so, old version can be cleaned in v25.11.
I would go with versioning. I can preserve old function names as it's basically the same functionality.
>
>
> > I still have some second thoughts about this max-simd-width. DPDK does
> > not impose any restrictions on this parameter in the multi-process usage, there
> may be some room to alter some things there.
> >
> >>
> >>> #ifdef __cplusplus
> >>> }
> >>> #endif
> >>> diff --git a/lib/net/version.map b/lib/net/version.map index
> >>> bec4ce23ea..47daf1464a 100644
> >>> --- a/lib/net/version.map
> >>> +++ b/lib/net/version.map
> >>> @@ -4,11 +4,25 @@ DPDK_25 {
> >>> rte_eth_random_addr;
> >>> rte_ether_format_addr;
> >>> rte_ether_unformat_addr;
> >>> - rte_net_crc_calc;
> >>> - rte_net_crc_set_alg;
> >>> rte_net_get_ptype;
> >>> rte_net_make_rarp_packet;
> >>> rte_net_skip_ip6_ext;
> >>> + rte_net_crc;
> >>> + rte_net_crc_set;
> >>>
> >>> local: *;
> >>> };
> >>> +
> >>> +INTERNAL {
> >>> + global:
> >>> +
> >>> + rte_net_crc_sse42_init;
> >>> + rte_crc16_ccitt_sse42_handler;
> >>> + rte_crc32_eth_sse42_handler;
> >>> + rte_net_crc_avx512_init;
> >>> + rte_crc16_ccitt_avx512_handler;
> >>> + rte_crc32_eth_avx512_handler;
> >>> + rte_net_crc_neon_init;
> >>> + rte_crc16_ccitt_neon_handler;
> >>> + rte_crc32_eth_neon_handler;
> >>> +};
> >>>
> >>
> >> +1 to David's comment, these are used only within component, no need
> >> +to
> >> export.
> >>
> >
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-09 7:48 ` Kusztal, ArkadiuszX
@ 2024-10-09 9:11 ` Ferruh Yigit
0 siblings, 0 replies; 17+ messages in thread
From: Ferruh Yigit @ 2024-10-09 9:11 UTC (permalink / raw)
To: Kusztal, ArkadiuszX, Marchand, David; +Cc: dev, Ji, Kai, Dooley, Brian
On 10/9/2024 8:48 AM, Kusztal, ArkadiuszX wrote:
>
>
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@amd.com>
>> Sent: Wednesday, October 9, 2024 3:03 AM
>> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand, David
>> <david.marchand@redhat.com>
>> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
>> <brian.dooley@intel.com>
>> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>>
>> On 10/8/2024 9:51 PM, Kusztal, ArkadiuszX wrote:
>>> Hi Ferruh,
>>> Thanks for the review, comments inline,
>>>
>>>> -----Original Message-----
>>>> From: Ferruh Yigit <ferruh.yigit@amd.com>
>>>> Sent: Tuesday, October 8, 2024 5:43 AM
>>>> To: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>; Marchand,
>>>> David <david.marchand@redhat.com>
>>>> Cc: dev@dpdk.org; Ji, Kai <kai.ji@intel.com>; Dooley, Brian
>>>> <brian.dooley@intel.com>
>>>> Subject: Re: [PATCH v2 1/3] net: add thread-safe crc api
>>>>
>>>> On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
>>>>> The current net CRC API is not thread-safe, this patch solves this
>>>>> by adding another, thread-safe API functions.
>>>>> This API is also safe to use across multiple processes, yet with
>>>>> limitations on max-simd-bitwidth, which will be checked only by the
>>>>> process that created the CRC context; all other processes will use
>>>>> the same CRC function when used with the same CRC context.
>>>>> It is an undefined behavior when process binaries are compiled with
>>>>> different SIMD capabilities when the same CRC context is used.
>>>>>
>>>>> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
>>>>
>>>> <...>
>>>>
>>>>> +static struct
>>>>> +{
>>>>> + uint32_t (*f[RTE_NET_CRC_REQS])
>>>>> + (const uint8_t *data, uint32_t data_len);
>>>>>
>>>>
>>>> It increases readability to typedef function pointers.
>>>
>>> Agree, though this typedef would be used here only, that’s why I left it out.
>>> But I can add it then.
>>>
>>>>
>>>>
>>>>> +} handlers[RTE_NET_CRC_AVX512 + 1];
>>>>>
>>>>> -/**
>>>>> - * Reflect the bits about the middle
>>>>> - *
>>>>> - * @param val
>>>>> - * value to be reflected
>>>>> - *
>>>>> - * @return
>>>>> - * reflected value
>>>>> - */
>>>>> -static uint32_t
>>>>> +static inline uint32_t
>>>>>
>>>>
>>>> Does changing to 'inline' required, as function is static compiler
>>>> can do the same.
>>>
>>> True, though it may be more readable sometimes.
>>> Of course there is no way that in O3 these functions would not be inlined by
>> the compiler, regardless if the inline hint is present or not.
>>>
>>>>
>>>>> reflect_32bits(uint32_t val)
>>>>> {
>>>>> uint32_t i, res = 0;
>>>>> @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
>>>>> return res;
>>>>> }
>>>>>
>>>>> -static void
>>>>> -crc32_eth_init_lut(uint32_t poly,
>>>>> - uint32_t *lut)
>>>>> -{
>>>>> - uint32_t i, j;
>>>>> -
>>>>> - for (i = 0; i < CRC_LUT_SIZE; i++) {
>>>>> - uint32_t crc = reflect_32bits(i);
>>>>> -
>>>>> - for (j = 0; j < 8; j++) {
>>>>> - if (crc & 0x80000000L)
>>>>> - crc = (crc << 1) ^ poly;
>>>>> - else
>>>>> - crc <<= 1;
>>>>> - }
>>>>> - lut[i] = reflect_32bits(crc);
>>>>> - }
>>>>> -}
>>>>> -
>>>>> -static __rte_always_inline uint32_t
>>>>> +static inline uint32_t
>>>>>
>>>>
>>>> Why not forcing inline anymore?
>>>> Are these inline changes related to the thread-safety?
>>>
>>> O3 will inline it anyway, and with always_inline it will be inline even in debug
>> mode. I just see no reason forcing it upon the compiler.
>>>
>>>>
>>>>> crc32_eth_calc_lut(const uint8_t *data,
>>>>> uint32_t data_len,
>>>>> uint32_t crc,
>>>>> @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
>>>>> return crc;
>>>>> }
>>>>>
>>>>> -static void
>>>>> -rte_net_crc_scalar_init(void)
>>>>> -{
>>>>> - /* 32-bit crc init */
>>>>> - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
>>>>> -
>>>>> - /* 16-bit CRC init */
>>>>> - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
>>>>> -}
>>>>> -
>>>>> static inline uint32_t
>>>>> -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
>>>>> +crc16_ccitt(const uint8_t *data, uint32_t data_len)
>>>>> {
>>>>> - /* return 16-bit CRC value */
>>>>>
>>>>
>>>> Why not keep comments? Are they wrong?
>>>
>>> Functions names are very self-explanatory, that’s why I dropped comments. I
>> can add comments if needed.
>>>
>>
>> I am for restricting changes to the target of the patch which is making CRC
>> calculation thread safe, unless code changes invalidates the comments, lets
>> keep them. Same goes with inline related modifications.
>>
>>>>
>>>> <...>
>>>>
>>>>> +static void
>>>>> +crc_scalar_init(void)
>>>>> +{
>>>>> + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
>>>>> + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
>>>>> +
>>>>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] =
>>>> crc16_ccitt;
>>>>> + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
>>>>>
>>>>
>>>> +1 to remove global handlers pointer and add context,
>>>>
>>>> But current handlers array content is static, it can be set when
>>>> defined, instead of functions.
>>>
>>> Can do it for scalars, but for SIMD there is this runtime check like this:
>>> if (AVX512_VPCLMULQDQ_CPU_SUPPORTED) { So compiled binary on
>> AVX512
>>> machine could filter it out on the machine which does not support it.
>>> There is no NULL check in crc function so it would not change much when
>> called -> Invalid address vs Invalid instruction.
>>> There are not many checks there, as this is CRC after all, it should be a small as
>> possible, yet probably NULL check could be advisable in crc function then.
>>>
>>
>> There is already AVX512_VPCLMULQDQ_CPU_SUPPORTED etc checks in
>> 'rte_net_crc_set()'.
>> So does it work to update 'handlers' statically, without condition, but have
>> conditions when use them.
>>
>>>
>>>>
>>>> <...>
>>>>
>>>>> -static uint32_t
>>>>> -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t
>>>>> data_len)
>>>>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
>>>>> + enum rte_net_crc_type type)
>>>>> {
>>>>> - handlers = NULL;
>>>>> - if (max_simd_bitwidth == 0)
>>>>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>>>> -
>>>>> - handlers = avx512_vpclmulqdq_get_handlers();
>>>>> - if (handlers != NULL)
>>>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>>>> - handlers = sse42_pclmulqdq_get_handlers();
>>>>> - if (handlers != NULL)
>>>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>>>> - handlers = neon_pmull_get_handlers();
>>>>> - if (handlers != NULL)
>>>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>>>> - handlers = handlers_scalar;
>>>>> - return handlers[RTE_NET_CRC32_ETH](data, data_len);
>>>>> -}
>>>>> + uint16_t max_simd_bitwidth;
>>>>>
>>>>> -/* Public API */
>>>>> -
>>>>> -void
>>>>> -rte_net_crc_set_alg(enum rte_net_crc_alg alg) -{
>>>>> - handlers = NULL;
>>>>> - if (max_simd_bitwidth == 0)
>>>>> - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>>>> + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>>>>>
>>>>> switch (alg) {
>>>>> case RTE_NET_CRC_AVX512:
>>>>> - handlers = avx512_vpclmulqdq_get_handlers();
>>>>> - if (handlers != NULL)
>>>>> - break;
>>>>> +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
>>>>> + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
>>>>> + max_simd_bitwidth >= RTE_VECT_SIMD_512) {
>>>>> + return (struct rte_net_crc){ RTE_NET_CRC_AVX512,
>>>> type };
>>>>> + }
>>>>> +#endif
>>>>> /* fall-through */
>>>>> case RTE_NET_CRC_SSE42:
>>>>> - handlers = sse42_pclmulqdq_get_handlers();
>>>>> - break; /* for x86, always break here */
>>>>> +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
>>>>> + if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
>>>>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
>>>>> + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type
>>>> };
>>>>> + }
>>>>> +#endif
>>>>> + break;
>>>>> case RTE_NET_CRC_NEON:
>>>>> - handlers = neon_pmull_get_handlers();
>>>>> - /* fall-through */
>>>>> - case RTE_NET_CRC_SCALAR:
>>>>> - /* fall-through */
>>>>> +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
>>>>> + if (NEON_PMULL_CPU_SUPPORTED &&
>>>>> + max_simd_bitwidth >= RTE_VECT_SIMD_128) {
>>>>> + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type
>>>> };
>>>>> + }
>>>>> +#endif
>>>>>
>>>>
>>>> Is it more readable as following, up to you:
>>>
>>> Agree, I will change it.
>>>
>>>>
>>>> ```
>>>> rte_net_crc_set(alg, type) {
>>>> enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
>>>> switch (alg) {
>>>> case AVX512:
>>>> new_alg = ..
>>>> case NEON:
>>>> new_alg = ..
>>>> }
>>>> return struct rte_net_crc){ new_alg, type }; ```
>>>>
>>>>
>>>>
>>>>
>>>>> + break;
>>>>> default:
>>>>> break;
>>>>> }
>>>>> -
>>>>> - if (handlers == NULL)
>>>>> - handlers = handlers_scalar;
>>>>> + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
>>>>> }
>>>>>
>>>>> -uint32_t
>>>>> -rte_net_crc_calc(const void *data,
>>>>> - uint32_t data_len,
>>>>> - enum rte_net_crc_type type)
>>>>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
>>>>> + const void *data, const uint32_t data_len)
>>>>> {
>>>>> - uint32_t ret;
>>>>> - rte_net_crc_handler f_handle;
>>>>> -
>>>>> - f_handle = handlers[type];
>>>>> - ret = f_handle(data, data_len);
>>>>> -
>>>>> - return ret;
>>>>> + return handlers[ctx->alg].f[ctx->type](data, data_len);
>>>>>
>>>>
>>>> 'rte_net_crc()' gets input from user and "struct rte_net_crc" is not
>>>> opaque, so user can provide invalid input, ctx->alg & ctx->type.
>>>> To protect against it input values should be checked before using.
>>>>
>>>> Or I think user not need to know the details of the "struct
>>>> rte_net_crc", so it can be an opaque variable for user.
>>>
>>> I would love it to be opaque, but then I would have to return a pointer, which
>> then would involve some allocations/deallocations and I wanted to keep is as
>> simple as possible.
>>> So probably the checks would be a way to go.
>>>
>>
>> True, +1 for simplicity.
>>
>>>>
>>>> <...>
>>>>
>>>>> -/**
>>>>> - * CRC compute API
>>>>> - *
>>>>> - * @param data
>>>>> - * Pointer to the packet data for CRC computation
>>>>> - * @param data_len
>>>>> - * Data length for CRC computation
>>>>> - * @param type
>>>>> - * CRC type (enum rte_net_crc_type)
>>>>> - *
>>>>> - * @return
>>>>> - * CRC value
>>>>> - */
>>>>> -uint32_t
>>>>> -rte_net_crc_calc(const void *data,
>>>>> - uint32_t data_len,
>>>>> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
>>>>> enum rte_net_crc_type type);
>>>>>
>>>>> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
>>>>> + const void *data, const uint32_t data_len);
>>>>> +
>>>>>
>>>>
>>>> As these are APIs, can you please add doxygen comments to them?
>>> +1
>>> I think this change could be deferred to 25.03.
>>> Adding this API without removing the old one should be possible without any
>> unwanted consequences?
>>>
>>
>> This is not a new functionality but replacement of existing one, so it will be
>> confusing to have two set of APIs for same functionality with similar names:
>> rte_net_crc_calc() and rte_net_crc()
>> rte_net_crc_set_alg() and rte_net_crc_set()
>>
>> Also there are some internal functions used by these APIs and supporting both
>> new and old may force to have two version of these internal functions and it will
>> create complexity/noise.
>>
>> As this release is ABI break release, it is easier to update APIs (although
>> deprecation notice is missing for this change).
>>
>>
>> As an alternative option, do you think applying ABI versioning in 25.03 works for
>> these APIs?
>> If so, old version can be cleaned in v25.11.
>
> I would go with versioning. I can preserve old function names as it's basically the same functionality.
>
ack
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 3/3] test/crc: replace thread-unsafe api functions
2024-10-01 18:11 ` [PATCH v2 3/3] test/crc: replace thread-unsafe api functions Arkadiusz Kusztal
@ 2024-12-02 22:33 ` Stephen Hemminger
0 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2024-12-02 22:33 UTC (permalink / raw)
To: Arkadiusz Kusztal; +Cc: dev, ferruh.yigit, kai.ji, brian.dooley
On Tue, 1 Oct 2024 19:11:50 +0100
Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com> wrote:
> This patch replaces thread-unsafe CRC functions with
> the safe ones.
>
> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
Testing the new API is good, but as long as the old API exists
the tests for it should be kept as well. Could you just add new autotest
instead of ditching the old one?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2 1/3] net: add thread-safe crc api
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
` (2 preceding siblings ...)
2024-10-08 3:42 ` Ferruh Yigit
@ 2024-12-02 22:36 ` Stephen Hemminger
3 siblings, 0 replies; 17+ messages in thread
From: Stephen Hemminger @ 2024-12-02 22:36 UTC (permalink / raw)
To: Arkadiusz Kusztal; +Cc: dev, ferruh.yigit, kai.ji, brian.dooley
On Tue, 1 Oct 2024 19:11:48 +0100
Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com> wrote:
> The current net CRC API is not thread-safe, this patch
> solves this by adding another, thread-safe API functions.
Couldn't the old API be made threadsafe with TLS?
> This API is also safe to use across multiple processes,
> yet with limitations on max-simd-bitwidth, which will be checked only by
> the process that created the CRC context; all other processes will use
> the same CRC function when used with the same CRC context.
> It is an undefined behavior when process binaries are compiled
> with different SIMD capabilities when the same CRC context is used.
>
> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal@intel.com>
The API/ABI can't change for 25.03, do you want to support both?
Or wait until 25.11?
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2024-12-02 22:36 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-01 18:11 [PATCH v2 0/3] net: add thread-safe crc api Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 1/3] " Arkadiusz Kusztal
2024-10-01 21:44 ` Stephen Hemminger
2024-10-02 8:28 ` Kusztal, ArkadiuszX
2024-10-02 7:42 ` David Marchand
2024-10-02 8:41 ` Kusztal, ArkadiuszX
2024-10-02 9:01 ` David Marchand
2024-10-02 9:16 ` Kusztal, ArkadiuszX
2024-10-08 3:42 ` Ferruh Yigit
2024-10-08 20:51 ` Kusztal, ArkadiuszX
2024-10-09 1:03 ` Ferruh Yigit
2024-10-09 7:48 ` Kusztal, ArkadiuszX
2024-10-09 9:11 ` Ferruh Yigit
2024-12-02 22:36 ` Stephen Hemminger
2024-10-01 18:11 ` [PATCH v2 2/3] crypto/qat: use process safe " Arkadiusz Kusztal
2024-10-01 18:11 ` [PATCH v2 3/3] test/crc: replace thread-unsafe api functions Arkadiusz Kusztal
2024-12-02 22:33 ` Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).