From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id CA6382C55 for ; Mon, 8 Apr 2019 20:24:23 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 31A2E882EF; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTP id 9B643665C4; Mon, 8 Apr 2019 18:24:22 +0000 (UTC) From: Aaron Conole To: dev@dpdk.org Cc: Konstantin Ananyev , Jerin Jacob , Gavin Hu Date: Mon, 8 Apr 2019 14:24:18 -0400 Message-Id: <20190408182420.4398-2-aconole@redhat.com> In-Reply-To: <20190408182420.4398-1-aconole@redhat.com> References: <20190408182420.4398-1-aconole@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Mon, 08 Apr 2019 18:24:23 +0000 (UTC) Subject: [dpdk-dev] [PATCH 1/3] acl: fix arm argument types X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Apr 2019 18:24:24 -0000 Compiler complains of argument type mismatch, like: ../lib/librte_acl/acl_run_neon.h: In function ‘transition4’: ../lib/librte_acl/acl_run_neon.h:115:2: note: use -flax-vector-conversions to permit conversions between vectors with differing element types or numbers of subparts node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); ^ ../lib/librte_acl/acl_run_neon.h:115:41: error: incompatible type for argument 2 of ‘vbicq_s32’ Signed-off-by: Aaron Conole --- lib/librte_acl/acl_run_neon.h | 46 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h index 01b9766d8..4a8e4b681 100644 --- a/lib/librte_acl/acl_run_neon.h +++ b/lib/librte_acl/acl_run_neon.h @@ -112,37 +112,41 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask); /* Calc node type and node addr */ - node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); - addr = vandq_s32(tr_hi_lo.val[0], index_msk); + node_type = (uint32x4_t) vbicq_s32(tr_hi_lo.val[0], + (int32x4_t)index_msk); + addr = (uint32x4_t) vandq_s32(tr_hi_lo.val[0], (int32x4_t) index_msk); /* t = 0 */ - t = veorq_s32(node_type, node_type); + t = veorq_s32((int32x4_t)node_type, (int32x4_t)node_type); /* mask for DFA type(0) nodes */ - dfa_msk = vceqq_u32(node_type, t); + dfa_msk = vceqq_u32(node_type, (uint32x4_t)t); - mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); - in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); + in = (int32x4_t) vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); /* DFA calculations. */ - r = vshrq_n_u32(in, 30); /* div by 64 */ - mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base); - r = vaddq_u8(r, mask); - t = vshrq_n_u32(in, 24); - r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); - dfa_ofs = vsubq_s32(t, r); + r = (int32x4_t) vshrq_n_u32((uint32x4_t) in, 30); /* div by 64 */ + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.range_base); + r = (int32x4_t) vaddq_u8((uint8x16_t)r, (uint8x16_t)mask); + t = (int32x4_t) vshrq_n_u32((uint32x4_t)in, 24); + r = (int32x4_t) vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); + dfa_ofs = (uint32x4_t) vsubq_s32(t, r); /* QUAD/SINGLE calculations. */ - t = vcgtq_s8(in, tr_hi_lo.val[1]); - t = vabsq_s8(t); - t = vpaddlq_u8(t); - quad_ofs = vpaddlq_u16(t); + t = (int32x4_t) vcgtq_s8((int8x16_t)in, (int8x16_t)tr_hi_lo.val[1]); + t = (int32x4_t) vabsq_s8((int8x16_t)t); + t = (int32x4_t) vpaddlq_u8((uint8x16_t)t); + quad_ofs = vpaddlq_u16((uint16x8_t)t); /* blend DFA and QUAD/SINGLE. */ - t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs); + t = (int32x4_t) vbslq_u8((uint8x16_t)dfa_msk, (uint8x16_t)dfa_ofs, + (uint8x16_t)quad_ofs); /* calculate address for next transitions */ - addr = vaddq_u32(addr, t); + addr = vaddq_u32(addr, (uint32x4_t)t); /* Fill next transitions */ transitions[0] = trans[vgetq_lane_u32(addr, 0)]; @@ -150,7 +154,7 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) transitions[2] = trans[vgetq_lane_u32(addr, 2)]; transitions[3] = trans[vgetq_lane_u32(addr, 3)]; - return vshrq_n_u32(next_input, CHAR_BIT); + return (int32x4_t) vshrq_n_u32((uint32x4_t)next_input, CHAR_BIT); } /* @@ -179,6 +183,9 @@ search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data, acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + memset(&input0, 0, sizeof(input0)); + memset(&input1, 0, sizeof(input1)); + while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0); @@ -240,6 +247,7 @@ search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data, /* Check for any matches. */ acl_match_check_x4(0, ctx, parms, &flows, index_array); + memset(&input, 0, sizeof(input)); while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0); -- 2.19.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id E2A0BA0096 for ; Mon, 8 Apr 2019 20:24:30 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id EA7D54CC7; Mon, 8 Apr 2019 20:24:28 +0200 (CEST) Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28]) by dpdk.org (Postfix) with ESMTP id CA6382C55 for ; Mon, 8 Apr 2019 20:24:23 +0200 (CEST) Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.13]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 31A2E882EF; Mon, 8 Apr 2019 18:24:23 +0000 (UTC) Received: from dhcp-25.97.bos.redhat.com (unknown [10.18.25.61]) by smtp.corp.redhat.com (Postfix) with ESMTP id 9B643665C4; Mon, 8 Apr 2019 18:24:22 +0000 (UTC) From: Aaron Conole To: dev@dpdk.org Cc: Konstantin Ananyev , Jerin Jacob , Gavin Hu Date: Mon, 8 Apr 2019 14:24:18 -0400 Message-Id: <20190408182420.4398-2-aconole@redhat.com> In-Reply-To: <20190408182420.4398-1-aconole@redhat.com> References: <20190408182420.4398-1-aconole@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Mon, 08 Apr 2019 18:24:23 +0000 (UTC) Subject: [dpdk-dev] [PATCH 1/3] acl: fix arm argument types X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Message-ID: <20190408182418.wdpkMD1Rl17SoTv1J-VEnIJRIT_asDJ3-KPznZCf76c@z> Compiler complains of argument type mismatch, like: ../lib/librte_acl/acl_run_neon.h: In function ‘transition4’: ../lib/librte_acl/acl_run_neon.h:115:2: note: use -flax-vector-conversions to permit conversions between vectors with differing element types or numbers of subparts node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); ^ ../lib/librte_acl/acl_run_neon.h:115:41: error: incompatible type for argument 2 of ‘vbicq_s32’ Signed-off-by: Aaron Conole --- lib/librte_acl/acl_run_neon.h | 46 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h index 01b9766d8..4a8e4b681 100644 --- a/lib/librte_acl/acl_run_neon.h +++ b/lib/librte_acl/acl_run_neon.h @@ -112,37 +112,41 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask); /* Calc node type and node addr */ - node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); - addr = vandq_s32(tr_hi_lo.val[0], index_msk); + node_type = (uint32x4_t) vbicq_s32(tr_hi_lo.val[0], + (int32x4_t)index_msk); + addr = (uint32x4_t) vandq_s32(tr_hi_lo.val[0], (int32x4_t) index_msk); /* t = 0 */ - t = veorq_s32(node_type, node_type); + t = veorq_s32((int32x4_t)node_type, (int32x4_t)node_type); /* mask for DFA type(0) nodes */ - dfa_msk = vceqq_u32(node_type, t); + dfa_msk = vceqq_u32(node_type, (uint32x4_t)t); - mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); - in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); + in = (int32x4_t) vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); /* DFA calculations. */ - r = vshrq_n_u32(in, 30); /* div by 64 */ - mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base); - r = vaddq_u8(r, mask); - t = vshrq_n_u32(in, 24); - r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); - dfa_ofs = vsubq_s32(t, r); + r = (int32x4_t) vshrq_n_u32((uint32x4_t) in, 30); /* div by 64 */ + mask = (uint32x4_t) + vld1q_s32((const int32_t *)&neon_acl_const.range_base); + r = (int32x4_t) vaddq_u8((uint8x16_t)r, (uint8x16_t)mask); + t = (int32x4_t) vshrq_n_u32((uint32x4_t)in, 24); + r = (int32x4_t) vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); + dfa_ofs = (uint32x4_t) vsubq_s32(t, r); /* QUAD/SINGLE calculations. */ - t = vcgtq_s8(in, tr_hi_lo.val[1]); - t = vabsq_s8(t); - t = vpaddlq_u8(t); - quad_ofs = vpaddlq_u16(t); + t = (int32x4_t) vcgtq_s8((int8x16_t)in, (int8x16_t)tr_hi_lo.val[1]); + t = (int32x4_t) vabsq_s8((int8x16_t)t); + t = (int32x4_t) vpaddlq_u8((uint8x16_t)t); + quad_ofs = vpaddlq_u16((uint16x8_t)t); /* blend DFA and QUAD/SINGLE. */ - t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs); + t = (int32x4_t) vbslq_u8((uint8x16_t)dfa_msk, (uint8x16_t)dfa_ofs, + (uint8x16_t)quad_ofs); /* calculate address for next transitions */ - addr = vaddq_u32(addr, t); + addr = vaddq_u32(addr, (uint32x4_t)t); /* Fill next transitions */ transitions[0] = trans[vgetq_lane_u32(addr, 0)]; @@ -150,7 +154,7 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) transitions[2] = trans[vgetq_lane_u32(addr, 2)]; transitions[3] = trans[vgetq_lane_u32(addr, 3)]; - return vshrq_n_u32(next_input, CHAR_BIT); + return (int32x4_t) vshrq_n_u32((uint32x4_t)next_input, CHAR_BIT); } /* @@ -179,6 +183,9 @@ search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data, acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + memset(&input0, 0, sizeof(input0)); + memset(&input1, 0, sizeof(input1)); + while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0); @@ -240,6 +247,7 @@ search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data, /* Check for any matches. */ acl_match_check_x4(0, ctx, parms, &flows, index_array); + memset(&input, 0, sizeof(input)); while (flows.started > 0) { /* Gather 4 bytes of input data for each stream. */ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0); -- 2.19.1