From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 72B521B10E for ; Wed, 10 Apr 2019 15:54:04 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id C7AD63154853; Wed, 10 Apr 2019 13:54:03 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 3B0D15D961; Wed, 10 Apr 2019 13:54:03 +0000 (UTC) From: Aaron Conole To: Bruce Richardson Cc: konstantin.ananyev@intel.com, dev@dpdk.org References: <20190410134517.63896-1-bruce.richardson@intel.com> <20190410134517.63896-2-bruce.richardson@intel.com> Date: Wed, 10 Apr 2019 09:54:02 -0400 In-Reply-To: <20190410134517.63896-2-bruce.richardson@intel.com> (Bruce Richardson's message of "Wed, 10 Apr 2019 14:45:16 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Scanned-By: MIMEDefang 2.79 on 10.5.11.14 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.41]); Wed, 10 Apr 2019 13:54:03 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH 1/2] acl: remove use of weak functions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 10 Apr 2019 13:54:04 -0000 Bruce Richardson writes: > Weak functions don't work well with static libraries and require the use of > "whole-archive" flag to ensure that the correct function is used when > linking. Since the weak functions are only used as placeholders within > this library alone, we can replace them with non-weak functions using > preprocessor ifdefs. > > Signed-off-by: Bruce Richardson > --- > lib/librte_acl/meson.build | 7 ++++++- > lib/librte_acl/rte_acl.c | 18 ++++++++++++++---- > mk/rte.app.mk | 3 --- > 3 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build > index 2207dbafe..98ece7d85 100644 > --- a/lib/librte_acl/meson.build > +++ b/lib/librte_acl/meson.build > @@ -6,7 +6,7 @@ sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c', > 'rte_acl.c', 'tb_mem.c') > headers = files('rte_acl.h', 'rte_acl_osdep.h') > > -if arch_subdir == 'x86' > +if dpdk_conf.has('RTE_ARCH_X86') > sources += files('acl_run_sse.c') > > # compile AVX2 version if either: > @@ -28,4 +28,9 @@ if arch_subdir == 'x86' > cflags += '-DCC_AVX2_SUPPORT' > endif > > +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64') > + cflags += '-flax-vector-conversions' > + sources += files('acl_run_neon.c') This will also need -Wno-uninitialized (otherwise it will generate warnings about the search_neon_4 and search_neon_8 functions). But I don't like papering over these conversions. I'd prefer instead the patches I posted at: http://mails.dpdk.org/archives/dev/2019-April/129540.html and http://mails.dpdk.org/archives/dev/2019-April/129541.html Are you opposed to merging those? > +elif dpdk_conf.has('RTE_ARCH_PPC_64') > + sources += files('acl_run_altivec.c') > endif > diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c > index c436a9bfd..fd5bd5e4e 100644 > --- a/lib/librte_acl/rte_acl.c > +++ b/lib/librte_acl/rte_acl.c > @@ -13,11 +13,13 @@ static struct rte_tailq_elem rte_acl_tailq = { > }; > EAL_REGISTER_TAILQ(rte_acl_tailq) > > +#ifndef RTE_ARCH_X86 > +#ifndef CC_AVX2_SUPPORT > /* > * If the compiler doesn't support AVX2 instructions, > * then the dummy one would be used instead for AVX2 classify method. > */ > -__rte_weak int > +int > rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -26,8 +28,9 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > -__rte_weak int > +int > rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -36,8 +39,11 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > -__rte_weak int > +#ifndef RTE_ARCH_ARM > +#ifndef RTE_ARCH_ARM64 > +int > rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -46,8 +52,11 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > +#endif > > -__rte_weak int > +#ifndef RTE_ARCH_PPC_64 > +int > rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -56,6 +65,7 @@ rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > static const rte_acl_classify_t classify_fns[] = { > [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, > diff --git a/mk/rte.app.mk b/mk/rte.app.mk > index 7d994bece..fdec636b4 100644 > --- a/mk/rte.app.mk > +++ b/mk/rte.app.mk > @@ -46,10 +46,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor > _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag > _LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter > _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm > -# librte_acl needs --whole-archive because of weak functions > -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl > -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson I think I have a solution for this that can use the weak aliasing and not require the use of the whole-archive flag. Would you prefer that? From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id D2B01A0096 for ; Wed, 10 Apr 2019 15:54:07 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 856571B111; Wed, 10 Apr 2019 15:54:06 +0200 (CEST) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id 72B521B10E for ; Wed, 10 Apr 2019 15:54:04 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.14]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id C7AD63154853; Wed, 10 Apr 2019 13:54:03 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 3B0D15D961; Wed, 10 Apr 2019 13:54:03 +0000 (UTC) From: Aaron Conole To: Bruce Richardson Cc: konstantin.ananyev@intel.com, dev@dpdk.org References: <20190410134517.63896-1-bruce.richardson@intel.com> <20190410134517.63896-2-bruce.richardson@intel.com> Date: Wed, 10 Apr 2019 09:54:02 -0400 In-Reply-To: <20190410134517.63896-2-bruce.richardson@intel.com> (Bruce Richardson's message of "Wed, 10 Apr 2019 14:45:16 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" X-Scanned-By: MIMEDefang 2.79 on 10.5.11.14 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.41]); Wed, 10 Apr 2019 13:54:03 +0000 (UTC) Subject: Re: [dpdk-dev] [PATCH 1/2] acl: remove use of weak functions X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Message-ID: <20190410135402.ilT0mptkaHotuFKRoI6Y0g6sxOBewDIHbSs7qmQeZYU@z> Bruce Richardson writes: > Weak functions don't work well with static libraries and require the use of > "whole-archive" flag to ensure that the correct function is used when > linking. Since the weak functions are only used as placeholders within > this library alone, we can replace them with non-weak functions using > preprocessor ifdefs. > > Signed-off-by: Bruce Richardson > --- > lib/librte_acl/meson.build | 7 ++++++- > lib/librte_acl/rte_acl.c | 18 ++++++++++++++---- > mk/rte.app.mk | 3 --- > 3 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build > index 2207dbafe..98ece7d85 100644 > --- a/lib/librte_acl/meson.build > +++ b/lib/librte_acl/meson.build > @@ -6,7 +6,7 @@ sources = files('acl_bld.c', 'acl_gen.c', 'acl_run_scalar.c', > 'rte_acl.c', 'tb_mem.c') > headers = files('rte_acl.h', 'rte_acl_osdep.h') > > -if arch_subdir == 'x86' > +if dpdk_conf.has('RTE_ARCH_X86') > sources += files('acl_run_sse.c') > > # compile AVX2 version if either: > @@ -28,4 +28,9 @@ if arch_subdir == 'x86' > cflags += '-DCC_AVX2_SUPPORT' > endif > > +elif dpdk_conf.has('RTE_ARCH_ARM') or dpdk_conf.has('RTE_ARCH_ARM64') > + cflags += '-flax-vector-conversions' > + sources += files('acl_run_neon.c') This will also need -Wno-uninitialized (otherwise it will generate warnings about the search_neon_4 and search_neon_8 functions). But I don't like papering over these conversions. I'd prefer instead the patches I posted at: http://mails.dpdk.org/archives/dev/2019-April/129540.html and http://mails.dpdk.org/archives/dev/2019-April/129541.html Are you opposed to merging those? > +elif dpdk_conf.has('RTE_ARCH_PPC_64') > + sources += files('acl_run_altivec.c') > endif > diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c > index c436a9bfd..fd5bd5e4e 100644 > --- a/lib/librte_acl/rte_acl.c > +++ b/lib/librte_acl/rte_acl.c > @@ -13,11 +13,13 @@ static struct rte_tailq_elem rte_acl_tailq = { > }; > EAL_REGISTER_TAILQ(rte_acl_tailq) > > +#ifndef RTE_ARCH_X86 > +#ifndef CC_AVX2_SUPPORT > /* > * If the compiler doesn't support AVX2 instructions, > * then the dummy one would be used instead for AVX2 classify method. > */ > -__rte_weak int > +int > rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -26,8 +28,9 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > -__rte_weak int > +int > rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -36,8 +39,11 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > -__rte_weak int > +#ifndef RTE_ARCH_ARM > +#ifndef RTE_ARCH_ARM64 > +int > rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -46,8 +52,11 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > +#endif > > -__rte_weak int > +#ifndef RTE_ARCH_PPC_64 > +int > rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, > __rte_unused const uint8_t **data, > __rte_unused uint32_t *results, > @@ -56,6 +65,7 @@ rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, > { > return -ENOTSUP; > } > +#endif > > static const rte_acl_classify_t classify_fns[] = { > [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, > diff --git a/mk/rte.app.mk b/mk/rte.app.mk > index 7d994bece..fdec636b4 100644 > --- a/mk/rte.app.mk > +++ b/mk/rte.app.mk > @@ -46,10 +46,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += -lrte_distributor > _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += -lrte_ip_frag > _LDLIBS-$(CONFIG_RTE_LIBRTE_METER) += -lrte_meter > _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm > -# librte_acl needs --whole-archive because of weak functions > -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl > -_LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive > _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson I think I have a solution for this that can use the weak aliasing and not require the use of the whole-archive flag. Would you prefer that?