DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
@ 2014-03-20 16:44 H. Peter Anvin
  2014-03-20 16:55 ` Neil Horman
  2014-03-20 17:03 ` H. Peter Anvin
  0 siblings, 2 replies; 8+ messages in thread
From: H. Peter Anvin @ 2014-03-20 16:44 UTC (permalink / raw)
  To: dev; +Cc: H. Peter Anvin

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
 1 file changed, 121 insertions(+), 151 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78cc2a48..bf66ad9d94ec 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
  */
 enum cpu_register_t {
 	REG_EAX = 0,
-	REG_EBX,
 	REG_ECX,
 	REG_EDX,
+	REG_EBX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -78,8 +69,10 @@ struct cpuid_parameters_t {
 struct feature_entry {
 	enum rte_cpu_flag_t feature;            /**< feature name */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 };
 
 #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
@@ -88,97 +81,97 @@ struct feature_entry {
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
-
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
-
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
-
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
-
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
-
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
-
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
-
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
+	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
+	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
+	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
+	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
+	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
+	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
+	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
+	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
+	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
+	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
+	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
+	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
+	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
+	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
+	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
+	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
+	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
+	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
+	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
+	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
+	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
+	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
+	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
+	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
+	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
+	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
+	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
+	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
+
+	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
+	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
+	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
+	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
+	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
+	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
+	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
+	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
+	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
+	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
+	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
+	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
+	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
+	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
+	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
+	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
+	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
+	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
+	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
+	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
+	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
+	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
+	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
+	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
+	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
+	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
+	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
+	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
+	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
+
+	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
+	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
+	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
+	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
+	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
+	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
+
+	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
+	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
+	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
+
+	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
+	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
+	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
+	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
+	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
+	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
+	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
+	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
+	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
+
+	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
+	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
+
+	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
+	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
+	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
+	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
+	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
+
+	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
 };
 
 /*
@@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
+    asm volatile("cpuid"
+		 : "=b" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #endif
-
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
 }
 
 /*
@@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpu_registers_t regs;
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[feat->reg] >> feat->bit) & 1;
 }
 
 /**
@@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
 	unsigned i;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:44 [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code H. Peter Anvin
@ 2014-03-20 16:55 ` Neil Horman
  2014-03-20 17:03 ` H. Peter Anvin
  1 sibling, 0 replies; 8+ messages in thread
From: Neil Horman @ 2014-03-20 16:55 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 09:44:28AM -0700, H. Peter Anvin wrote:
> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>

> ---
>  lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
>  1 file changed, 121 insertions(+), 151 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> index 1ebf78cc2a48..bf66ad9d94ec 100644
> --- a/lib/librte_eal/common/eal_common_cpuflags.c
> +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> @@ -54,21 +54,12 @@
>   */
>  enum cpu_register_t {
>  	REG_EAX = 0,
> -	REG_EBX,
>  	REG_ECX,
>  	REG_EDX,
> +	REG_EBX,
>  };
>  
> -/**
> - * Parameters for CPUID instruction
> - */
> -struct cpuid_parameters_t {
> -	uint32_t eax;
> -	uint32_t ebx;
> -	uint32_t ecx;
> -	uint32_t edx;
> -	enum cpu_register_t return_register;
> -};
> +typedef uint32_t cpuid_registers_t[4];
>  
>  #define CPU_FLAG_NAME_MAX_LEN 64
>  
> @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
>  struct feature_entry {
>  	enum rte_cpu_flag_t feature;            /**< feature name */
>  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> -	uint32_t feature_mask;                  /**< bitmask for feature */
> +	uint32_t leaf;				/**< cpuid leaf */
> +	uint32_t subleaf;			/**< cpuid subleaf */
> +	uint32_t reg;				/**< cpuid register */
> +	uint32_t bit;				/**< cpuid register bit */
>  };
>  
>  #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> @@ -88,97 +81,97 @@ struct feature_entry {
>   * An array that holds feature entries
>   */
>  static const struct feature_entry cpu_feature_table[] = {
> -	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> -	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> -	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> -	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> -	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> -	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> -	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> -	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> -	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> -	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> -	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> -	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> -	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> -	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> -	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> -	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> -	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> -	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> -	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> -	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> -	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> -	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> -	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> -	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> -	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> -	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> -	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> -
> -	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> -	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> -	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> -	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> -	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> -	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> -	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> -	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> -	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> -	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> -	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> -	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> -	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> -	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> -	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> -	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> -	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> -	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> -	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> -	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> -	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> -	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> -	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> -	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> -	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> -	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> -
> -	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> -	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> -	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> -	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> -	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> -	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> -
> -	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> -
> -	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> -	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> -	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> -	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> -	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> -	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> -	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> -	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> -	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> -
> -	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> -
> -	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> -	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> -
> -	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> +	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
> +	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
> +	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
> +	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
> +	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
> +	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
> +	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
> +	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
> +	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
> +	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
> +	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
> +	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
> +	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
> +	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
> +	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
> +	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
> +	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
> +	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
> +	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
> +	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
> +	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
> +	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
> +	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
> +	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
> +	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
> +	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
> +	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
> +	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
> +
> +	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
> +	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
> +	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
> +	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
> +	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
> +	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
> +	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
> +	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
> +	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
> +	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
> +	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
> +	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
> +	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
> +	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
> +	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
> +	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
> +	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
> +	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
> +	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
> +	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
> +	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
> +	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
> +	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
> +	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
> +	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
> +	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
> +
> +	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
> +	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
> +	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
> +	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
> +	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
> +	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
> +
> +	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
> +	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
> +	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
> +
> +	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
> +	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
> +	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
> +	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
> +	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
> +	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
> +	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
> +	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
> +	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
> +
> +	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
> +
> +	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
> +	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
> +
> +	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
>  };
>  
>  /*
> @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
>   * This function, when compiled with GCC, will generate architecture-neutral
>   * code, as per GCC manual.
>   */
> -static inline int
> -rte_cpu_get_features(struct cpuid_parameters_t params)
> +static inline void
> +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
>  {
> -	int eax, ebx, ecx, edx;            /* registers */
> -
> -#ifndef __PIC__
> -   asm volatile ("cpuid"
> -                 /* output */
> -                 : "=a" (eax),
> -                   "=b" (ebx),
> -                   "=c" (ecx),
> -                   "=d" (edx)
> -                 /* input */
> -                 : "a" (params.eax),
> -                   "b" (params.ebx),
> -                   "c" (params.ecx),
> -                   "d" (params.edx));
> +#if defined(__i386__) && defined(__PIC__)
> +    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> +    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> +		 : "=r" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #else
> -	asm volatile ( 
> -            "mov %%ebx, %%edi\n"
> -            "cpuid\n"
> -            "xchgl %%ebx, %%edi;\n"
> -            : "=a" (eax),
> -              "=D" (ebx),
> -              "=c" (ecx),
> -              "=d" (edx)
> -            /* input */
> -            : "a" (params.eax),
> -              "D" (params.ebx),
> -              "c" (params.ecx),
> -              "d" (params.edx));
> +    asm volatile("cpuid"
> +		 : "=b" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #endif
> -
> -	switch (params.return_register) {
> -	case REG_EAX:
> -		return eax;
> -	case REG_EBX:
> -		return ebx;
> -	case REG_ECX:
> -		return ecx;
> -	case REG_EDX:
> -		return edx;
> -	default:
> -		return 0;
> -	}
>  }
>  
>  /*
> @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
>  int
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  {
> -	int value;
> +	const struct feature_entry *feat;
> +	cpu_registers_t regs;
>  
>  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
>  		/* Flag does not match anything in the feature tables */
>  		return -ENOENT;
>  
> -	/* get value of the register containing the desired feature */
> -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> +	feat = &cpu_feature_table[feature];
> +
> +	/* get the cpuid leaf containing the desired feature */
> +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
>  
>  	/* check if the feature is enabled */
> -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> +	return (regs[feat->reg] >> feat->bit) & 1;
>  }
>  
>  /**
> @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
>  	unsigned i;
>  
>  	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> -		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> +		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
>  			fprintf(stderr,
>  			        "ERROR: This system does not support \"%s\".\n"
>  			        "Please check that RTE_MACHINE is set correctly.\n",
> -- 
> 1.8.5.3
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:44 [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code H. Peter Anvin
  2014-03-20 16:55 ` Neil Horman
@ 2014-03-20 17:03 ` H. Peter Anvin
  2014-03-24 16:06   ` Neil Horman
  1 sibling, 1 reply; 8+ messages in thread
From: H. Peter Anvin @ 2014-03-20 17:03 UTC (permalink / raw)
  To: H. Peter Anvin, dev

I just realized there is yet another oddity in this code:

> @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
>  struct feature_entry {
>  	enum rte_cpu_flag_t feature;            /**< feature name */

The structure contains a field with an enum value...

>  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> -	uint32_t feature_mask;                  /**< bitmask for feature */
> +	uint32_t leaf;				/**< cpuid leaf */
> +	uint32_t subleaf;			/**< cpuid subleaf */
> +	uint32_t reg;				/**< cpuid register */
> +	uint32_t bit;				/**< cpuid register bit */
>  };
>  
>  
>  /*
> @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
>  int
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  {
> -	int value;
> +	const struct feature_entry *feat;
> +	cpu_registers_t regs;
>  
>  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
>  		/* Flag does not match anything in the feature tables */
>  		return -ENOENT;
>  
> -	/* get value of the register containing the desired feature */
> -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> +	feat = &cpu_feature_table[feature];
> +
> +	/* get the cpuid leaf containing the desired feature */
> +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
>  
>  	/* check if the feature is enabled */
> -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> +	return (regs[feat->reg] >> feat->bit) & 1;
>  }
>  
>  /**

... however, this field is never actually accessed *anywhere* in the
code; the code instead uses the enum value as the table index. There is
absolutely no enforcement that the table contents is aligned with the enum.

If C99-style initializers are permitted in this codebase, I would
strongly recommend using them, and then drop the enum field in struct
feature_entry and use a macro such as:

#define FEAT(name,leaf,subleaf,reg,bit) \
	[RTE_CPUFLAG_##f] = { leaf, subleaf, reg, bit, #f },

(I'd move the string to the end, but that is just a microoptimization.
I'm kind of OCD that way.)

	-hpa

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 17:03 ` H. Peter Anvin
@ 2014-03-24 16:06   ` Neil Horman
  2014-03-24 16:11     ` H. Peter Anvin
  0 siblings, 1 reply; 8+ messages in thread
From: Neil Horman @ 2014-03-24 16:06 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 10:03:53AM -0700, H. Peter Anvin wrote:
> I just realized there is yet another oddity in this code:
> 
> > @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
> >  struct feature_entry {
> >  	enum rte_cpu_flag_t feature;            /**< feature name */
> 
> The structure contains a field with an enum value...
> 
> >  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> > -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> > -	uint32_t feature_mask;                  /**< bitmask for feature */
> > +	uint32_t leaf;				/**< cpuid leaf */
> > +	uint32_t subleaf;			/**< cpuid subleaf */
> > +	uint32_t reg;				/**< cpuid register */
> > +	uint32_t bit;				/**< cpuid register bit */
> >  };
> >  
> >  
> >  /*
> > @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
> >  int
> >  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
> >  {
> > -	int value;
> > +	const struct feature_entry *feat;
> > +	cpu_registers_t regs;
> >  
> >  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
> >  		/* Flag does not match anything in the feature tables */
> >  		return -ENOENT;
> >  
> > -	/* get value of the register containing the desired feature */
> > -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> > +	feat = &cpu_feature_table[feature];
> > +
> > +	/* get the cpuid leaf containing the desired feature */
> > +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
> >  
> >  	/* check if the feature is enabled */
> > -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> > +	return (regs[feat->reg] >> feat->bit) & 1;
> >  }
> >  
> >  /**
> 
> ... however, this field is never actually accessed *anywhere* in the
> code; the code instead uses the enum value as the table index. There is
> absolutely no enforcement that the table contents is aligned with the enum.
> 
> If C99-style initializers are permitted in this codebase, I would
> strongly recommend using them, and then drop the enum field in struct
> feature_entry and use a macro such as:
> 
Actually, its a bit simpler than that, the enum parameter is actually completely
unused, and so can be removed entirely.  The FEAT_DEF macro does what you
suggest below already, but only for the feature and name fields.

I'll remove the enum and its definition, and augment the macro to cover the rest
of the fields.

Neil

> #define FEAT(name,leaf,subleaf,reg,bit) \
> 	[RTE_CPUFLAG_##f] = { leaf, subleaf, reg, bit, #f },
> 
> (I'd move the string to the end, but that is just a microoptimization.
> I'm kind of OCD that way.)
> 
> 	-hpa
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-24 16:06   ` Neil Horman
@ 2014-03-24 16:11     ` H. Peter Anvin
  0 siblings, 0 replies; 8+ messages in thread
From: H. Peter Anvin @ 2014-03-24 16:11 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, H. Peter Anvin

On 03/24/2014 09:06 AM, Neil Horman wrote:
>>
>> If C99-style initializers are permitted in this codebase, I would
>> strongly recommend using them, and then drop the enum field in struct
>> feature_entry and use a macro such as:
>>
> Actually, its a bit simpler than that, the enum parameter is actually completely
> unused, and so can be removed entirely.  The FEAT_DEF macro does what you
> suggest below already, but only for the feature and name fields.
> 
> I'll remove the enum and its definition, and augment the macro to cover the rest
> of the fields.
> 
> Neil
> 
>> #define FEAT(name,leaf,subleaf,reg,bit) \
>> 	[RTE_CPUFLAG_##f] = { leaf, subleaf, reg, bit, #f },
>>
>> (I'd move the string to the end, but that is just a microoptimization.
>> I'm kind of OCD that way.)
>>

The nice thing with the C99 initializer is that even if the order is
mismatched between the .c file and the .h file which defines the enum,
things still work correctly.

	-hpa

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` Neil Horman
  2014-03-20 17:02   ` Thomas Monjalon
@ 2014-03-20 18:04   ` Neil Horman
  1 sibling, 0 replies; 8+ messages in thread
From: Neil Horman @ 2014-03-20 18:04 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 12:39:21PM -0400, Neil Horman wrote:
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> Acked-by: Neil Horman <nhorman@tuxdriver.com>
> 
Sorry, I'm just acking the proposed change, I've not tested it yet, though based
on our conversation, this is the right thing to do.  I'll have test reports
shortly.
Neil

> > ---
> >  lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
> >  1 file changed, 121 insertions(+), 151 deletions(-)
> > 
> > diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> > index 1ebf78cc2a48..bf66ad9d94ec 100644
> > --- a/lib/librte_eal/common/eal_common_cpuflags.c
> > +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> > @@ -54,21 +54,12 @@
> >   */
> >  enum cpu_register_t {
> >  	REG_EAX = 0,
> > -	REG_EBX,
> >  	REG_ECX,
> >  	REG_EDX,
> > +	REG_EBX,
> >  };
> >  
> > -/**
> > - * Parameters for CPUID instruction
> > - */
> > -struct cpuid_parameters_t {
> > -	uint32_t eax;
> > -	uint32_t ebx;
> > -	uint32_t ecx;
> > -	uint32_t edx;
> > -	enum cpu_register_t return_register;
> > -};
> > +typedef uint32_t cpuid_registers_t[4];
> >  
> >  #define CPU_FLAG_NAME_MAX_LEN 64
> >  
> > @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
> >  struct feature_entry {
> >  	enum rte_cpu_flag_t feature;            /**< feature name */
> >  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> > -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> > -	uint32_t feature_mask;                  /**< bitmask for feature */
> > +	uint32_t leaf;				/**< cpuid leaf */
> > +	uint32_t subleaf;			/**< cpuid subleaf */
> > +	uint32_t reg;				/**< cpuid register */
> > +	uint32_t bit;				/**< cpuid register bit */
> >  };
> >  
> >  #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> > @@ -88,97 +81,97 @@ struct feature_entry {
> >   * An array that holds feature entries
> >   */
> >  static const struct feature_entry cpu_feature_table[] = {
> > -	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> > -	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> > -	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> > -	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> > -	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> > -	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> > -	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> > -	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> > -	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> > -	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> > -	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> > -	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> > -	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> > -	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> > -	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> > -	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> > -	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> > -	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> > -	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> > -	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> > -	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> > -	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> > -	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> > -	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> > -	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> > -	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> > -	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> > -	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> > -
> > -	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> > -	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> > -	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> > -	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> > -	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> > -	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> > -	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> > -	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> > -	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> > -	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> > -	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> > -	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> > -	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> > -	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> > -	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> > -	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> > -	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> > -	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> > -	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> > -	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> > -	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> > -	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> > -	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> > -	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> > -	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> > -	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> > -	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> > -	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> > -	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> > -
> > -	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> > -	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> > -	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> > -	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> > -	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> > -	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> > -
> > -	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> > -	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> > -
> > -	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> > -	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> > -	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> > -	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> > -	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> > -	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> > -	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> > -	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> > -	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> > -
> > -	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> > -
> > -	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> > -	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> > -	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> > -	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> > -	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> > -
> > -	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> > +	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
> > +	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
> > +	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
> > +	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
> > +	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
> > +	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
> > +	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
> > +	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
> > +	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
> > +	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
> > +	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
> > +	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
> > +	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
> > +	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
> > +	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
> > +	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
> > +	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
> > +	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
> > +	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
> > +	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
> > +	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
> > +	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
> > +	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
> > +	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
> > +	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
> > +	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
> > +	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
> > +	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
> > +	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
> > +
> > +	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
> > +	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
> > +	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
> > +	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
> > +	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
> > +	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
> > +	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
> > +	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
> > +	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
> > +	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
> > +	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
> > +	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
> > +	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
> > +	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
> > +	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
> > +	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
> > +	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
> > +	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
> > +	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
> > +	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
> > +	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
> > +	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
> > +	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
> > +	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
> > +	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
> > +	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
> > +	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
> > +	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
> > +	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
> > +
> > +	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
> > +	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
> > +	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
> > +	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
> > +	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
> > +	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
> > +
> > +	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
> > +	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
> > +	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
> > +
> > +	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
> > +	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
> > +	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
> > +	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
> > +	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
> > +	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
> > +	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
> > +	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
> > +	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
> > +
> > +	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
> > +	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
> > +
> > +	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
> > +	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
> > +	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
> > +	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
> > +	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
> > +
> > +	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
> >  };
> >  
> >  /*
> > @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
> >   * This function, when compiled with GCC, will generate architecture-neutral
> >   * code, as per GCC manual.
> >   */
> > -static inline int
> > -rte_cpu_get_features(struct cpuid_parameters_t params)
> > +static inline void
> > +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
> >  {
> > -	int eax, ebx, ecx, edx;            /* registers */
> > -
> > -#ifndef __PIC__
> > -   asm volatile ("cpuid"
> > -                 /* output */
> > -                 : "=a" (eax),
> > -                   "=b" (ebx),
> > -                   "=c" (ecx),
> > -                   "=d" (edx)
> > -                 /* input */
> > -                 : "a" (params.eax),
> > -                   "b" (params.ebx),
> > -                   "c" (params.ecx),
> > -                   "d" (params.edx));
> > +#if defined(__i386__) && defined(__PIC__)
> > +    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> > +    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> > +		 : "=r" (out[REG_EBX]),
> > +		   "=a" (out[REG_EAX]),
> > +		   "=c" (out[REG_ECX]),
> > +		   "=d" (out[REG_EDX])
> > +		 : "a" (leaf), "c" (subleaf));
> >  #else
> > -	asm volatile ( 
> > -            "mov %%ebx, %%edi\n"
> > -            "cpuid\n"
> > -            "xchgl %%ebx, %%edi;\n"
> > -            : "=a" (eax),
> > -              "=D" (ebx),
> > -              "=c" (ecx),
> > -              "=d" (edx)
> > -            /* input */
> > -            : "a" (params.eax),
> > -              "D" (params.ebx),
> > -              "c" (params.ecx),
> > -              "d" (params.edx));
> > +    asm volatile("cpuid"
> > +		 : "=b" (out[REG_EBX]),
> > +		   "=a" (out[REG_EAX]),
> > +		   "=c" (out[REG_ECX]),
> > +		   "=d" (out[REG_EDX])
> > +		 : "a" (leaf), "c" (subleaf));
> >  #endif
> > -
> > -	switch (params.return_register) {
> > -	case REG_EAX:
> > -		return eax;
> > -	case REG_EBX:
> > -		return ebx;
> > -	case REG_ECX:
> > -		return ecx;
> > -	case REG_EDX:
> > -		return edx;
> > -	default:
> > -		return 0;
> > -	}
> >  }
> >  
> >  /*
> > @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
> >  int
> >  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
> >  {
> > -	int value;
> > +	const struct feature_entry *feat;
> > +	cpu_registers_t regs;
> >  
> >  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
> >  		/* Flag does not match anything in the feature tables */
> >  		return -ENOENT;
> >  
> > -	/* get value of the register containing the desired feature */
> > -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> > +	feat = &cpu_feature_table[feature];
> > +
> > +	/* get the cpuid leaf containing the desired feature */
> > +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
> >  
> >  	/* check if the feature is enabled */
> > -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> > +	return (regs[feat->reg] >> feat->bit) & 1;
> >  }
> >  
> >  /**
> > @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
> >  	unsigned i;
> >  
> >  	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> > -		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> > +		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
> >  			fprintf(stderr,
> >  			        "ERROR: This system does not support \"%s\".\n"
> >  			        "Please check that RTE_MACHINE is set correctly.\n",
> > -- 
> > 1.8.5.3
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` Neil Horman
@ 2014-03-20 17:02   ` Thomas Monjalon
  2014-03-20 18:04   ` Neil Horman
  1 sibling, 0 replies; 8+ messages in thread
From: Thomas Monjalon @ 2014-03-20 17:02 UTC (permalink / raw)
  To: Neil Horman, H. Peter Anvin; +Cc: dev, H. Peter Anvin

Hi,

20/03/2014 12:39, Neil Horman :
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> 
> Acked-by: Neil Horman <nhorman@tuxdriver.com>

This is a RFC UNTESTED patch.
So should I understand with this acknowledgement that you have tested it?
As a shared library? in 32-bit mode?

-- 
Thomas

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
       [not found] <1395330830-1310-1-git-send-email-hpa@linux.intel.com>
@ 2014-03-20 16:39 ` Neil Horman
  2014-03-20 17:02   ` Thomas Monjalon
  2014-03-20 18:04   ` Neil Horman
  0 siblings, 2 replies; 8+ messages in thread
From: Neil Horman @ 2014-03-20 16:39 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>

> ---
>  lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
>  1 file changed, 121 insertions(+), 151 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> index 1ebf78cc2a48..bf66ad9d94ec 100644
> --- a/lib/librte_eal/common/eal_common_cpuflags.c
> +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> @@ -54,21 +54,12 @@
>   */
>  enum cpu_register_t {
>  	REG_EAX = 0,
> -	REG_EBX,
>  	REG_ECX,
>  	REG_EDX,
> +	REG_EBX,
>  };
>  
> -/**
> - * Parameters for CPUID instruction
> - */
> -struct cpuid_parameters_t {
> -	uint32_t eax;
> -	uint32_t ebx;
> -	uint32_t ecx;
> -	uint32_t edx;
> -	enum cpu_register_t return_register;
> -};
> +typedef uint32_t cpuid_registers_t[4];
>  
>  #define CPU_FLAG_NAME_MAX_LEN 64
>  
> @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
>  struct feature_entry {
>  	enum rte_cpu_flag_t feature;            /**< feature name */
>  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> -	uint32_t feature_mask;                  /**< bitmask for feature */
> +	uint32_t leaf;				/**< cpuid leaf */
> +	uint32_t subleaf;			/**< cpuid subleaf */
> +	uint32_t reg;				/**< cpuid register */
> +	uint32_t bit;				/**< cpuid register bit */
>  };
>  
>  #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> @@ -88,97 +81,97 @@ struct feature_entry {
>   * An array that holds feature entries
>   */
>  static const struct feature_entry cpu_feature_table[] = {
> -	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> -	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> -	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> -	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> -	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> -	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> -	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> -	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> -	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> -	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> -	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> -	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> -	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> -	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> -	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> -	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> -	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> -	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> -	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> -	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> -	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> -	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> -	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> -	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> -	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> -	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> -	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> -
> -	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> -	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> -	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> -	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> -	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> -	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> -	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> -	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> -	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> -	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> -	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> -	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> -	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> -	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> -	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> -	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> -	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> -	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> -	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> -	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> -	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> -	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> -	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> -	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> -	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> -	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> -
> -	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> -	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> -	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> -	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> -	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> -	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> -
> -	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> -
> -	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> -	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> -	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> -	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> -	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> -	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> -	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> -	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> -	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> -
> -	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> -
> -	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> -	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> -
> -	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> +	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
> +	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
> +	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
> +	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
> +	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
> +	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
> +	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
> +	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
> +	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
> +	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
> +	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
> +	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
> +	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
> +	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
> +	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
> +	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
> +	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
> +	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
> +	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
> +	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
> +	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
> +	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
> +	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
> +	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
> +	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
> +	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
> +	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
> +	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
> +
> +	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
> +	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
> +	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
> +	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
> +	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
> +	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
> +	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
> +	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
> +	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
> +	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
> +	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
> +	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
> +	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
> +	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
> +	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
> +	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
> +	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
> +	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
> +	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
> +	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
> +	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
> +	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
> +	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
> +	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
> +	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
> +	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
> +
> +	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
> +	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
> +	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
> +	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
> +	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
> +	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
> +
> +	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
> +	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
> +	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
> +
> +	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
> +	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
> +	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
> +	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
> +	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
> +	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
> +	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
> +	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
> +	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
> +
> +	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
> +
> +	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
> +	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
> +
> +	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
>  };
>  
>  /*
> @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
>   * This function, when compiled with GCC, will generate architecture-neutral
>   * code, as per GCC manual.
>   */
> -static inline int
> -rte_cpu_get_features(struct cpuid_parameters_t params)
> +static inline void
> +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
>  {
> -	int eax, ebx, ecx, edx;            /* registers */
> -
> -#ifndef __PIC__
> -   asm volatile ("cpuid"
> -                 /* output */
> -                 : "=a" (eax),
> -                   "=b" (ebx),
> -                   "=c" (ecx),
> -                   "=d" (edx)
> -                 /* input */
> -                 : "a" (params.eax),
> -                   "b" (params.ebx),
> -                   "c" (params.ecx),
> -                   "d" (params.edx));
> +#if defined(__i386__) && defined(__PIC__)
> +    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> +    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> +		 : "=r" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #else
> -	asm volatile ( 
> -            "mov %%ebx, %%edi\n"
> -            "cpuid\n"
> -            "xchgl %%ebx, %%edi;\n"
> -            : "=a" (eax),
> -              "=D" (ebx),
> -              "=c" (ecx),
> -              "=d" (edx)
> -            /* input */
> -            : "a" (params.eax),
> -              "D" (params.ebx),
> -              "c" (params.ecx),
> -              "d" (params.edx));
> +    asm volatile("cpuid"
> +		 : "=b" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #endif
> -
> -	switch (params.return_register) {
> -	case REG_EAX:
> -		return eax;
> -	case REG_EBX:
> -		return ebx;
> -	case REG_ECX:
> -		return ecx;
> -	case REG_EDX:
> -		return edx;
> -	default:
> -		return 0;
> -	}
>  }
>  
>  /*
> @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
>  int
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  {
> -	int value;
> +	const struct feature_entry *feat;
> +	cpu_registers_t regs;
>  
>  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
>  		/* Flag does not match anything in the feature tables */
>  		return -ENOENT;
>  
> -	/* get value of the register containing the desired feature */
> -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> +	feat = &cpu_feature_table[feature];
> +
> +	/* get the cpuid leaf containing the desired feature */
> +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
>  
>  	/* check if the feature is enabled */
> -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> +	return (regs[feat->reg] >> feat->bit) & 1;
>  }
>  
>  /**
> @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
>  	unsigned i;
>  
>  	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> -		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> +		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
>  			fprintf(stderr,
>  			        "ERROR: This system does not support \"%s\".\n"
>  			        "Please check that RTE_MACHINE is set correctly.\n",
> -- 
> 1.8.5.3
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-03-24 16:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-03-20 16:44 [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code H. Peter Anvin
2014-03-20 16:55 ` Neil Horman
2014-03-20 17:03 ` H. Peter Anvin
2014-03-24 16:06   ` Neil Horman
2014-03-24 16:11     ` H. Peter Anvin
     [not found] <1395330830-1310-1-git-send-email-hpa@linux.intel.com>
2014-03-20 16:39 ` Neil Horman
2014-03-20 17:02   ` Thomas Monjalon
2014-03-20 18:04   ` Neil Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).