From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp.tuxdriver.com (charlotte.tuxdriver.com [70.61.120.58]) by dpdk.org (Postfix) with ESMTP id 205D5959 for ; Mon, 15 Dec 2014 16:56:41 +0100 (CET) Received: from hmsreliant.think-freely.org ([2001:470:8:a08:7aac:c0ff:fec2:933b] helo=localhost) by smtp.tuxdriver.com with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.63) (envelope-from ) id 1Y0Y0i-0002wB-HY; Mon, 15 Dec 2014 10:56:39 -0500 Date: Mon, 15 Dec 2014 10:56:30 -0500 From: Neil Horman To: Konstantin Ananyev Message-ID: <20141215155630.GB3803@hmsreliant.think-freely.org> References: <1418580659-12595-1-git-send-email-konstantin.ananyev@intel.com> <1418580659-12595-10-git-send-email-konstantin.ananyev@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1418580659-12595-10-git-send-email-konstantin.ananyev@intel.com> User-Agent: Mutt/1.5.23 (2014-03-12) X-Spam-Score: -2.9 (--) X-Spam-Status: No Cc: dev@dpdk.org Subject: Re: [dpdk-dev] [PATCH 09/17] EAL: introduce rte_ymm and relatives in rte_common_vect.h. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 15 Dec 2014 15:56:41 -0000 On Sun, Dec 14, 2014 at 06:10:51PM +0000, Konstantin Ananyev wrote: > New data type to manipulate 256 bit AVX values. > Rename field in the rte_xmm to keep common naming accross SSE/AVX fields. > > Signed-off-by: Konstantin Ananyev > --- > examples/l3fwd/main.c | 2 +- > lib/librte_acl/acl_run_sse.c | 88 ++++++++++++------------- > lib/librte_eal/common/include/rte_common_vect.h | 27 +++++++- > lib/librte_lpm/rte_lpm.h | 2 +- > 4 files changed, 71 insertions(+), 48 deletions(-) > > diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c > index bf0fcdb..dc6cae2 100644 > --- a/examples/l3fwd/main.c > +++ b/examples/l3fwd/main.c > @@ -1168,7 +1168,7 @@ processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag, > if (likely(flag != 0)) { > rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dprt, portid); > } else { > - dst.m = dip; > + dst.x = dip; > dprt[0] = get_dst_port(qconf, pkt[0], dst.u32[0], portid); > dprt[1] = get_dst_port(qconf, pkt[1], dst.u32[1], portid); > dprt[2] = get_dst_port(qconf, pkt[2], dst.u32[2], portid); > diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c > index 09e32be..4605b58 100644 > --- a/lib/librte_acl/acl_run_sse.c > +++ b/lib/librte_acl/acl_run_sse.c > @@ -359,16 +359,16 @@ search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data, > > /* Check for any matches. */ > acl_match_check_x4(0, ctx, parms, &flows, > - &indices1, &indices2, mm_match_mask.m); > + &indices1, &indices2, mm_match_mask.x); > acl_match_check_x4(4, ctx, parms, &flows, > - &indices3, &indices4, mm_match_mask.m); > + &indices3, &indices4, mm_match_mask.x); > > while (flows.started > 0) { > > /* Gather 4 bytes of input data for each stream. */ > - input0 = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0), > + input0 = MM_INSERT32(mm_ones_16.x, GET_NEXT_4BYTES(parms, 0), > 0); > - input1 = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 4), > + input1 = MM_INSERT32(mm_ones_16.x, GET_NEXT_4BYTES(parms, 4), > 0); > > input0 = MM_INSERT32(input0, GET_NEXT_4BYTES(parms, 1), 1); > @@ -382,43 +382,43 @@ search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data, > > /* Process the 4 bytes of input on each stream. */ > > - input0 = transition4(mm_index_mask.m, input0, > - mm_shuffle_input.m, mm_ones_16.m, > + input0 = transition4(mm_index_mask.x, input0, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input1 = transition4(mm_index_mask.m, input1, > - mm_shuffle_input.m, mm_ones_16.m, > + input1 = transition4(mm_index_mask.x, input1, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices3, &indices4); > > - input0 = transition4(mm_index_mask.m, input0, > - mm_shuffle_input.m, mm_ones_16.m, > + input0 = transition4(mm_index_mask.x, input0, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input1 = transition4(mm_index_mask.m, input1, > - mm_shuffle_input.m, mm_ones_16.m, > + input1 = transition4(mm_index_mask.x, input1, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices3, &indices4); > > - input0 = transition4(mm_index_mask.m, input0, > - mm_shuffle_input.m, mm_ones_16.m, > + input0 = transition4(mm_index_mask.x, input0, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input1 = transition4(mm_index_mask.m, input1, > - mm_shuffle_input.m, mm_ones_16.m, > + input1 = transition4(mm_index_mask.x, input1, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices3, &indices4); > > - input0 = transition4(mm_index_mask.m, input0, > - mm_shuffle_input.m, mm_ones_16.m, > + input0 = transition4(mm_index_mask.x, input0, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input1 = transition4(mm_index_mask.m, input1, > - mm_shuffle_input.m, mm_ones_16.m, > + input1 = transition4(mm_index_mask.x, input1, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices3, &indices4); > > /* Check for any matches. */ > acl_match_check_x4(0, ctx, parms, &flows, > - &indices1, &indices2, mm_match_mask.m); > + &indices1, &indices2, mm_match_mask.x); > acl_match_check_x4(4, ctx, parms, &flows, > - &indices3, &indices4, mm_match_mask.m); > + &indices3, &indices4, mm_match_mask.x); > } > > return 0; > @@ -451,36 +451,36 @@ search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data, > > /* Check for any matches. */ > acl_match_check_x4(0, ctx, parms, &flows, > - &indices1, &indices2, mm_match_mask.m); > + &indices1, &indices2, mm_match_mask.x); > > while (flows.started > 0) { > > /* Gather 4 bytes of input data for each stream. */ > - input = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0), 0); > + input = MM_INSERT32(mm_ones_16.x, GET_NEXT_4BYTES(parms, 0), 0); > input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1); > input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 2), 2); > input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 3), 3); > > /* Process the 4 bytes of input on each stream. */ > - input = transition4(mm_index_mask.m, input, > - mm_shuffle_input.m, mm_ones_16.m, > + input = transition4(mm_index_mask.x, input, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input = transition4(mm_index_mask.m, input, > - mm_shuffle_input.m, mm_ones_16.m, > + input = transition4(mm_index_mask.x, input, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input = transition4(mm_index_mask.m, input, > - mm_shuffle_input.m, mm_ones_16.m, > + input = transition4(mm_index_mask.x, input, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > - input = transition4(mm_index_mask.m, input, > - mm_shuffle_input.m, mm_ones_16.m, > + input = transition4(mm_index_mask.x, input, > + mm_shuffle_input.x, mm_ones_16.x, > flows.trans, &indices1, &indices2); > > /* Check for any matches. */ > acl_match_check_x4(0, ctx, parms, &flows, > - &indices1, &indices2, mm_match_mask.m); > + &indices1, &indices2, mm_match_mask.x); > } > > return 0; > @@ -534,35 +534,35 @@ search_sse_2(const struct rte_acl_ctx *ctx, const uint8_t **data, > indices = MM_LOADU((xmm_t *) &index_array[0]); > > /* Check for any matches. */ > - acl_match_check_x2(0, ctx, parms, &flows, &indices, mm_match_mask64.m); > + acl_match_check_x2(0, ctx, parms, &flows, &indices, mm_match_mask64.x); > > while (flows.started > 0) { > > /* Gather 4 bytes of input data for each stream. */ > - input = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0), 0); > + input = MM_INSERT32(mm_ones_16.x, GET_NEXT_4BYTES(parms, 0), 0); > input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1); > > /* Process the 4 bytes of input on each stream. */ > > - input = transition2(mm_index_mask64.m, input, > - mm_shuffle_input64.m, mm_ones_16.m, > + input = transition2(mm_index_mask64.x, input, > + mm_shuffle_input64.x, mm_ones_16.x, > flows.trans, &indices); > > - input = transition2(mm_index_mask64.m, input, > - mm_shuffle_input64.m, mm_ones_16.m, > + input = transition2(mm_index_mask64.x, input, > + mm_shuffle_input64.x, mm_ones_16.x, > flows.trans, &indices); > > - input = transition2(mm_index_mask64.m, input, > - mm_shuffle_input64.m, mm_ones_16.m, > + input = transition2(mm_index_mask64.x, input, > + mm_shuffle_input64.x, mm_ones_16.x, > flows.trans, &indices); > > - input = transition2(mm_index_mask64.m, input, > - mm_shuffle_input64.m, mm_ones_16.m, > + input = transition2(mm_index_mask64.x, input, > + mm_shuffle_input64.x, mm_ones_16.x, > flows.trans, &indices); > > /* Check for any matches. */ > acl_match_check_x2(0, ctx, parms, &flows, &indices, > - mm_match_mask64.m); > + mm_match_mask64.x); > } > > return 0; > diff --git a/lib/librte_eal/common/include/rte_common_vect.h b/lib/librte_eal/common/include/rte_common_vect.h > index 95bf4b1..617470b 100644 > --- a/lib/librte_eal/common/include/rte_common_vect.h > +++ b/lib/librte_eal/common/include/rte_common_vect.h > @@ -54,6 +54,10 @@ > #include > #endif > > +#if defined(__AVX__) > +#include > +#endif > + > #else > > #include > @@ -70,7 +74,7 @@ typedef __m128i xmm_t; > #define XMM_MASK (XMM_SIZE - 1) > > typedef union rte_xmm { > - xmm_t m; > + xmm_t x; > uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; > uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; > uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; > @@ -78,10 +82,29 @@ typedef union rte_xmm { > double pd[XMM_SIZE / sizeof(double)]; > } rte_xmm_t; > > +#ifdef __AVX__ > + Why are you excluding this type based on instruction availability. I don't see anything in the definition that makes any of the types included dependent on AVX availability. > +typedef __m256i ymm_t; > + > +#define YMM_SIZE (sizeof(ymm_t)) > +#define YMM_MASK (YMM_SIZE - 1) > + > +typedef union rte_ymm { > + ymm_t y; > + xmm_t x[YMM_SIZE / sizeof(xmm_t)]; > + uint8_t u8[YMM_SIZE / sizeof(uint8_t)]; > + uint16_t u16[YMM_SIZE / sizeof(uint16_t)]; > + uint32_t u32[YMM_SIZE / sizeof(uint32_t)]; > + uint64_t u64[YMM_SIZE / sizeof(uint64_t)]; > + double pd[YMM_SIZE / sizeof(double)]; > +} rte_ymm_t; > + > +#endif /* __AVX__ */