From: Konstantin Ananyev <konstantin.ananyev@intel.com>
To: dev@dpdk.org
Cc: jerinj@marvell.com, ruifeng.wang@arm.com,
vladimir.medvedkin@intel.com,
Konstantin Ananyev <konstantin.ananyev@intel.com>
Subject: [dpdk-dev] [PATCH v4 07/14] acl: add infrastructure to support AVX512 classify
Date: Tue, 6 Oct 2020 16:03:09 +0100 [thread overview]
Message-ID: <20201006150316.5776-8-konstantin.ananyev@intel.com> (raw)
In-Reply-To: <20201006150316.5776-1-konstantin.ananyev@intel.com>
Add necessary changes to support new AVX512 specific ACL classify
algorithm:
- changes in meson.build to check that build tools
(compiler, assembler, etc.) do properly support AVX512.
- run-time checks to make sure target platform does support AVX512.
- dummy rte_acl_classify_avx512() for targets where AVX512
implementation couldn't be properly supported.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
config/x86/meson.build | 3 ++-
lib/librte_acl/acl.h | 8 ++++++
lib/librte_acl/acl_run_avx512.c | 29 ++++++++++++++++++++
lib/librte_acl/meson.build | 48 +++++++++++++++++++++++++++++++++
lib/librte_acl/rte_acl.c | 42 +++++++++++++++++++++++++++++
lib/librte_acl/rte_acl.h | 2 ++
6 files changed, 131 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_acl/acl_run_avx512.c
diff --git a/config/x86/meson.build b/config/x86/meson.build
index fea4d54035..724e69f4c4 100644
--- a/config/x86/meson.build
+++ b/config/x86/meson.build
@@ -22,7 +22,8 @@ foreach f:base_flags
endforeach
optional_flags = ['AES', 'PCLMUL',
- 'AVX', 'AVX2', 'AVX512F',
+ 'AVX', 'AVX2',
+ 'AVX512F', 'AVX512VL', 'AVX512CD', 'AVX512BW',
'RDRND', 'RDSEED']
foreach f:optional_flags
if cc.get_define('__@0@__'.format(f), args: machine_args) == '1'
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index 39d45a0c2b..543ce55659 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -201,6 +201,14 @@ int
rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);
+int
+rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
int
rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);
diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c
new file mode 100644
index 0000000000..1817f88b29
--- /dev/null
+++ b/lib/librte_acl/acl_run_avx512.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "acl_run_sse.h"
+
+int
+rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories)
+{
+ if (num >= MAX_SEARCHES_SSE8)
+ return search_sse_8(ctx, data, results, num, categories);
+ if (num >= MAX_SEARCHES_SSE4)
+ return search_sse_4(ctx, data, results, num, categories);
+
+ return rte_acl_classify_scalar(ctx, data, results, num, categories);
+}
+
+int
+rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories)
+{
+ if (num >= MAX_SEARCHES_SSE8)
+ return search_sse_8(ctx, data, results, num, categories);
+ if (num >= MAX_SEARCHES_SSE4)
+ return search_sse_4(ctx, data, results, num, categories);
+
+ return rte_acl_classify_scalar(ctx, data, results, num, categories);
+}
diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
index b31a3f798e..a3c7c398d0 100644
--- a/lib/librte_acl/meson.build
+++ b/lib/librte_acl/meson.build
@@ -27,6 +27,54 @@ if dpdk_conf.has('RTE_ARCH_X86')
cflags += '-DCC_AVX2_SUPPORT'
endif
+ # compile AVX512 version if:
+ # we are building 64-bit binary AND binutils can generate proper code
+
+ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0
+
+ # compile AVX512 version if either:
+ # a. we have AVX512 supported in minimum instruction set
+ # baseline
+ # b. it's not minimum instruction set, but supported by
+ # compiler
+ #
+ # in former case, just add avx512 C file to files list
+ # in latter case, compile c file to static lib, using correct
+ # compiler flags, and then have the .o file from static lib
+ # linked into main lib.
+
+ # check if all required flags already enabled (variant a).
+ acl_avx512_flags = ['__AVX512F__', '__AVX512VL__',
+ '__AVX512CD__', '__AVX512BW__']
+
+ acl_avx512_on = true
+ foreach f:acl_avx512_flags
+
+ if cc.get_define(f, args: machine_args) == ''
+ acl_avx512_on = false
+ endif
+ endforeach
+
+ if acl_avx512_on == true
+
+ sources += files('acl_run_avx512.c')
+ cflags += '-DCC_AVX512_SUPPORT'
+
+ elif cc.has_multi_arguments('-mavx512f', '-mavx512vl',
+ '-mavx512cd', '-mavx512bw')
+
+ avx512_tmplib = static_library('avx512_tmp',
+ 'acl_run_avx512.c',
+ dependencies: static_rte_eal,
+ c_args: cflags +
+ ['-mavx512f', '-mavx512vl',
+ '-mavx512cd', '-mavx512bw'])
+ objs += avx512_tmplib.extract_objects(
+ 'acl_run_avx512.c')
+ cflags += '-DCC_AVX512_SUPPORT'
+ endif
+ endif
+
elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64')
cflags += '-flax-vector-conversions'
sources += files('acl_run_neon.c')
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 863549a38b..1154f35107 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -16,6 +16,32 @@ static struct rte_tailq_elem rte_acl_tailq = {
};
EAL_REGISTER_TAILQ(rte_acl_tailq)
+#ifndef CC_AVX512_SUPPORT
+/*
+ * If the compiler doesn't support AVX512 instructions,
+ * then the dummy one would be used instead for AVX512 classify method.
+ */
+int
+rte_acl_classify_avx512x16(__rte_unused const struct rte_acl_ctx *ctx,
+ __rte_unused const uint8_t **data,
+ __rte_unused uint32_t *results,
+ __rte_unused uint32_t num,
+ __rte_unused uint32_t categories)
+{
+ return -ENOTSUP;
+}
+
+int
+rte_acl_classify_avx512x32(__rte_unused const struct rte_acl_ctx *ctx,
+ __rte_unused const uint8_t **data,
+ __rte_unused uint32_t *results,
+ __rte_unused uint32_t num,
+ __rte_unused uint32_t categories)
+{
+ return -ENOTSUP;
+}
+#endif
+
#ifndef CC_AVX2_SUPPORT
/*
* If the compiler doesn't support AVX2 instructions,
@@ -77,6 +103,8 @@ static const rte_acl_classify_t classify_fns[] = {
[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
+ [RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16,
+ [RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32,
};
/*
@@ -126,6 +154,18 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg)
static int
acl_check_alg_x86(enum rte_acl_classify_alg alg)
{
+ if (alg == RTE_ACL_CLASSIFY_AVX512X16 ||
+ alg == RTE_ACL_CLASSIFY_AVX512X32) {
+#ifdef CC_AVX512_SUPPORT
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) &&
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) &&
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) &&
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW))
+ return 0;
+#endif
+ return -ENOTSUP;
+ }
+
if (alg == RTE_ACL_CLASSIFY_AVX2) {
#ifdef CC_AVX2_SUPPORT
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -159,6 +199,8 @@ acl_check_alg(enum rte_acl_classify_alg alg)
return acl_check_alg_arm(alg);
case RTE_ACL_CLASSIFY_ALTIVEC:
return acl_check_alg_ppc(alg);
+ case RTE_ACL_CLASSIFY_AVX512X32:
+ case RTE_ACL_CLASSIFY_AVX512X16:
case RTE_ACL_CLASSIFY_AVX2:
case RTE_ACL_CLASSIFY_SSE:
return acl_check_alg_x86(alg);
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 3999f15ded..1bfed00743 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -241,6 +241,8 @@ enum rte_acl_classify_alg {
RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */
RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */
RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */
+ RTE_ACL_CLASSIFY_AVX512X16 = 6, /**< requires AVX512 support. */
+ RTE_ACL_CLASSIFY_AVX512X32 = 7, /**< requires AVX512 support. */
};
/**
--
2.17.1
next prev parent reply other threads:[~2020-10-06 15:10 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-07 16:28 [dpdk-dev] [PATCH 20.11 0/7] acl: introduce AVX512 classify method Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 1/7] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 2/7] app/acl: few small improvements Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 3/7] acl: remove of unused enum value Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 4/7] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 5/7] app/acl: add AVX512 classify support Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 6/7] acl: introduce AVX512 classify implementation Konstantin Ananyev
2020-08-07 16:28 ` [dpdk-dev] [PATCH 20.11 7/7] acl: enhance " Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 00/12] acl: introduce AVX512 classify method Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 01/12] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 02/12] doc: fix mixing classify methods in ACL guide Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 03/12] acl: remove of unused enum value Konstantin Ananyev
2020-09-27 3:27 ` Ruifeng Wang
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 04/12] acl: remove library constructor Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 05/12] app/acl: few small improvements Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 06/12] test/acl: expand classify test coverage Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 07/12] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-09-16 9:11 ` Bruce Richardson
2020-09-16 9:36 ` Medvedkin, Vladimir
2020-09-16 9:49 ` Bruce Richardson
2020-09-16 10:06 ` Ananyev, Konstantin
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 08/12] acl: introduce AVX512 classify implementation Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 09/12] acl: enhance " Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 10/12] acl: for AVX512 classify use 4B load whenever possible Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 11/12] test/acl: add AVX512 classify support Konstantin Ananyev
2020-09-15 16:50 ` [dpdk-dev] [PATCH v2 12/12] app/acl: " Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 00/14] acl: introduce AVX512 classify methods Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 01/14] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 02/14] doc: fix missing classify methods in ACL guide Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 03/14] acl: remove of unused enum value Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 04/14] acl: remove library constructor Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 05/14] app/acl: few small improvements Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 06/14] test/acl: expand classify test coverage Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 07/14] acl: add infrastructure to support AVX512 classify Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 08/14] acl: introduce 256-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 09/14] acl: update default classify algorithm selection Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 10/14] acl: introduce 512-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 11/14] acl: for AVX512 classify use 4B load whenever possible Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 12/14] acl: deduplicate AVX512 code paths Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 13/14] test/acl: add AVX512 classify support Konstantin Ananyev
2020-10-05 18:45 ` [dpdk-dev] [PATCH v3 14/14] app/acl: " Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 00/14] acl: introduce AVX512 classify methods Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 01/14] acl: fix x86 build when compiler doesn't support AVX2 Konstantin Ananyev
2020-10-08 13:42 ` [dpdk-dev] [dpdk-stable] " David Marchand
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 02/14] doc: fix missing classify methods in ACL guide Konstantin Ananyev
2020-10-08 13:42 ` David Marchand
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 03/14] acl: remove of unused enum value Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 04/14] acl: remove library constructor Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 05/14] app/acl: few small improvements Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 06/14] test/acl: expand classify test coverage Konstantin Ananyev
2020-10-06 15:03 ` Konstantin Ananyev [this message]
2020-10-13 19:17 ` [dpdk-dev] [PATCH v4 07/14] acl: add infrastructure to support AVX512 classify David Marchand
2020-10-13 22:26 ` Ananyev, Konstantin
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 08/14] acl: introduce 256-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 09/14] acl: update default classify algorithm selection Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 10/14] acl: introduce 512-bit width AVX512 classify implementation Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 11/14] acl: for AVX512 classify use 4B load whenever possible Konstantin Ananyev
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 12/14] acl: deduplicate AVX512 code paths Konstantin Ananyev
2020-10-16 15:56 ` Ferruh Yigit
2020-10-16 16:20 ` Thomas Monjalon
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 13/14] test/acl: add AVX512 classify support Konstantin Ananyev
2020-10-14 10:26 ` David Marchand
2020-10-14 10:32 ` Ananyev, Konstantin
2020-10-14 10:35 ` David Marchand
2020-10-06 15:03 ` [dpdk-dev] [PATCH v4 14/14] app/acl: " Konstantin Ananyev
2020-10-14 12:40 ` [dpdk-dev] [PATCH v4 00/14] acl: introduce AVX512 classify methods David Marchand
2020-10-06 15:05 ` [dpdk-dev] [PATCH v3 " David Marchand
2020-10-06 16:07 ` Ananyev, Konstantin
2020-10-08 10:49 ` David Marchand
2020-10-14 9:23 ` Kinsella, Ray
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201006150316.5776-8-konstantin.ananyev@intel.com \
--to=konstantin.ananyev@intel.com \
--cc=dev@dpdk.org \
--cc=jerinj@marvell.com \
--cc=ruifeng.wang@arm.com \
--cc=vladimir.medvedkin@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).