From: Neil Horman <nhorman@tuxdriver.com>
To: dev@dpdk.org
Subject: [dpdk-dev] [PATCH] acl: If build does not support sse4.2, emulate missing instructions with C code
Date: Mon, 4 Aug 2014 11:35:58 -0400 [thread overview]
Message-ID: <1407166558-9532-1-git-send-email-nhorman@tuxdriver.com> (raw)
The ACL library makes extensive use of some SSE4.2 instructions, which means the
default build can't compile this library. Work around the problem by testing
the __SSE42__ definition in the acl_vects.h file and defining the macros there
as intrinsics or c-level equivalants. Note this is a minimal patch, adjusting
only the definitions that are currently used in the ACL library.
Only compile tested so far, but I wanted to post it for early review so that
others could aid in unit testing.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Thomas Monjalon <thomas.monjalon@6wind.com>
CC: "Konstantin Ananyev" <konstantin.ananyev@intel.com>
CC: Bruce Richardson <bruce.richardson@intel.com>
---
lib/librte_acl/acl_bld.c | 3 +-
lib/librte_acl/acl_vect.h | 102 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 100 insertions(+), 5 deletions(-)
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index 873447b..de974a4 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -31,7 +31,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <nmmintrin.h>
#include <rte_acl.h>
#include "tb_mem.h"
#include "acl.h"
@@ -1481,7 +1480,7 @@ acl_calc_wildness(struct rte_acl_build_rule *head,
switch (rule->config->defs[n].type) {
case RTE_ACL_FIELD_TYPE_BITMASK:
wild = (size -
- _mm_popcnt_u32(fld->mask_range.u8)) /
+ __builtin_popcountl(fld->mask_range.u8)) /
size;
break;
diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
index d813600..e5f391b 100644
--- a/lib/librte_acl/acl_vect.h
+++ b/lib/librte_acl/acl_vect.h
@@ -34,6 +34,10 @@
#ifndef _RTE_ACL_VECT_H_
#define _RTE_ACL_VECT_H_
+#ifdef __SSE4_1__
+#include <smmintrin.h>
+#endif
+
/**
* @file
*
@@ -44,12 +48,12 @@
extern "C" {
#endif
+
#define MM_ADD16(a, b) _mm_add_epi16(a, b)
#define MM_ADD32(a, b) _mm_add_epi32(a, b)
#define MM_ALIGNR8(a, b, c) _mm_alignr_epi8(a, b, c)
#define MM_AND(a, b) _mm_and_si128(a, b)
#define MM_ANDNOT(a, b) _mm_andnot_si128(a, b)
-#define MM_BLENDV8(a, b, c) _mm_blendv_epi8(a, b, c)
#define MM_CMPEQ16(a, b) _mm_cmpeq_epi16(a, b)
#define MM_CMPEQ32(a, b) _mm_cmpeq_epi32(a, b)
#define MM_CMPEQ8(a, b) _mm_cmpeq_epi8(a, b)
@@ -59,7 +63,6 @@ extern "C" {
#define MM_CVT32(a) _mm_cvtsi128_si32(a)
#define MM_CVTU32(a) _mm_cvtsi32_si128(a)
#define MM_INSERT16(a, c, b) _mm_insert_epi16(a, c, b)
-#define MM_INSERT32(a, c, b) _mm_insert_epi32(a, c, b)
#define MM_LOAD(a) _mm_load_si128(a)
#define MM_LOADH_PI(a, b) _mm_loadh_pi(a, b)
#define MM_LOADU(a) _mm_loadu_si128(a)
@@ -82,7 +85,6 @@ extern "C" {
#define MM_SRL32(a, b) _mm_srli_epi32(a, b)
#define MM_STORE(a, b) _mm_store_si128(a, b)
#define MM_STOREU(a, b) _mm_storeu_si128(a, b)
-#define MM_TESTZ(a, b) _mm_testz_si128(a, b)
#define MM_XOR(a, b) _mm_xor_si128(a, b)
#define MM_SET16(a, b, c, d, e, f, g, h) \
@@ -93,6 +95,100 @@ extern "C" {
_mm_set_epi8(c0, c1, c2, c3, c4, c5, c6, c7, \
c8, c9, cA, cB, cC, cD, cE, cF)
+
+#ifndef __SSE4_1__
+static inline xmm_t pblendvb(xmm_t dst, xmm_t src, xmm_t mask)
+{
+ unsigned char tmpd[16], tmps[16], tmpm[16];
+ int i;
+
+ MM_STOREU((xmm_t *)&tmpd, dst);
+ MM_STOREU((xmm_t *)&tmps, src);
+ MM_STOREU((xmm_t *)&tmpm, mask);
+
+ for (i = 0; i < 16; i++)
+ if (mask[i] & 0x8)
+ dst[i] = src[i];
+
+ dst = MM_LOADU((xmm_t *)&tmpd);
+
+ return dst;
+}
+
+#define MM_BLENDV8(a, b, c) pblendvb(a, b, c)
+
+
+static inline int ptestz(xmm_t a, xmm_t b)
+{
+ unsigned long long tmpa[2], tmpb[2];
+
+ MM_STOREU((xmm_t *)&tmpa, a);
+ MM_STOREU((xmm_t *)&tmpb, b);
+
+ if (tmpa[0] & tmpb[0])
+ return 1;
+ if (tmpa[1] & tmpb[1])
+ return 1;
+
+ return 0;
+}
+
+#define MM_TESTZ(a, b) ptestz(a, b)
+
+static inline xmm_t pinsrd(xmm_t dst, int32_t val, char off)
+{
+ unsigned long long tmpa[2];
+ unsigned long long mask;
+ int32_t tmp;
+
+ MM_STOREU((xmm_t *)&tmpa, dst);
+
+ /*
+ * Inserting a dword is a bit odd as it can cross a word boundary
+ */
+
+ if (off > 32) {
+ /*
+ * If the offset is more than 32, then part of the
+ * inserted word will appear in the upper half of the xmm
+ * register. Grab the part of the value that crosses the 64 bit
+ * boundary.
+ */
+ tmp = val >> (off - 32);
+
+ /*
+ * Mask off the least significant bits of the upper longword
+ */
+ mask = ~((1 << (off - 32)) - 1);
+ tmpa[1] &= mask;
+
+ /*
+ * and insert the new value
+ */
+ tmpa[1] |= tmp;
+ }
+ if (off < 64) {
+ /*
+ * If the offset is less than 64 bits, we also need to mask and
+ * assign the lower longword
+ */
+ mask = (1 << off) - 1;
+ tmpa[0] &= mask;
+ tmpa[0] |= (val << off);
+ }
+
+ dst = MM_LOADU((xmm_t *)&tmpa);
+ return dst;
+}
+
+#define MM_INSERT32(a, c, b) pinsrd(a, c, b)
+
+#else
+#define MM_BLENDV8(a, b, c) _mm_blendv_epi8(a, b, c)
+#define MM_TESTZ(a, b) _mm_testz_si128(a, b)
+#define MM_INSERT32(a, c, b) _mm_insert_epi32(a, c, b)
+#endif
+
#ifdef RTE_ARCH_X86_64
#define MM_CVT64(a) _mm_cvtsi128_si64(a)
--
1.8.3.1
next reply other threads:[~2014-08-04 15:33 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-04 15:35 Neil Horman [this message]
2014-08-05 15:26 ` Ananyev, Konstantin
2014-08-05 18:20 ` Neil Horman
2014-08-06 10:52 ` Ananyev, Konstantin
2014-08-06 12:12 ` Neil Horman
2014-08-06 12:23 ` Ananyev, Konstantin
2014-08-06 13:35 ` Neil Horman
2014-08-06 11:39 ` Ananyev, Konstantin
2014-08-06 12:18 ` Neil Horman
2014-08-06 12:26 ` Ananyev, Konstantin
2014-08-06 16:59 ` Richardson, Bruce
2014-08-06 17:27 ` Neil Horman
2014-08-12 23:19 ` Thomas Monjalon
2014-08-13 12:33 ` Neil Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1407166558-9532-1-git-send-email-nhorman@tuxdriver.com \
--to=nhorman@tuxdriver.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).