From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.zytor.com (terminus.zytor.com [198.137.202.10]) by dpdk.org (Postfix) with ESMTP id 5C3BE590F for ; Thu, 20 Mar 2014 17:43:07 +0100 (CET) Received: from tazenda.hos.anvin.org ([IPv6:2601:9:7280:8f0:cc79:79ff:fead:f559]) (authenticated bits=0) by mail.zytor.com (8.14.7/8.14.5) with ESMTP id s2KGia2N011556 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=NO); Thu, 20 Mar 2014 09:44:37 -0700 Received: from tazenda.hos.anvin.org (localhost [127.0.0.1]) by tazenda.hos.anvin.org (8.14.8/8.14.5) with ESMTP id s2KGiU9C002845; Thu, 20 Mar 2014 09:44:30 -0700 Received: (from hpa@localhost) by tazenda.hos.anvin.org (8.14.8/8.14.8/Submit) id s2KGiUm9002844; Thu, 20 Mar 2014 09:44:30 -0700 From: "H. Peter Anvin" To: dev@dpdk.org Date: Thu, 20 Mar 2014 09:44:28 -0700 Message-Id: <1395333868-2808-1-git-send-email-hpa@linux.intel.com> X-Mailer: git-send-email 1.8.5.3 Cc: "H. Peter Anvin" Subject: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 20 Mar 2014 16:43:07 -0000 Neil Horman reported that on x86-64 the upper half of %rbx would get clobbered when the code was compiled PIC or PIE, because the i386-specific code to preserve %ebx was incorrectly compiled. However, the code is really way more complex than it needs to be. For one thing, the CPUID instruction only needs %eax (leaf) and %ecx (subleaf) as parameters, and since we are testing for bits, we might as well list the bits explicitly. Furthermore, we can use an array rather than doing a switch statement inside a structure. Reported-by: Neil Horman Signed-off-by: H. Peter Anvin --- lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++--------------- 1 file changed, 121 insertions(+), 151 deletions(-) diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index 1ebf78cc2a48..bf66ad9d94ec 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -54,21 +54,12 @@ */ enum cpu_register_t { REG_EAX = 0, - REG_EBX, REG_ECX, REG_EDX, + REG_EBX, }; -/** - * Parameters for CPUID instruction - */ -struct cpuid_parameters_t { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - enum cpu_register_t return_register; -}; +typedef uint32_t cpuid_registers_t[4]; #define CPU_FLAG_NAME_MAX_LEN 64 @@ -78,8 +69,10 @@ struct cpuid_parameters_t { struct feature_entry { enum rte_cpu_flag_t feature; /**< feature name */ char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ - struct cpuid_parameters_t params; /**< cpuid parameters */ - uint32_t feature_mask; /**< bitmask for feature */ + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ }; #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f @@ -88,97 +81,97 @@ struct feature_entry { * An array that holds feature entries */ static const struct feature_entry cpu_feature_table[] = { - {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004}, - {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008}, - {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010}, - {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020}, - {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040}, - {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080}, - {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100}, - {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200}, - {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400}, - {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000}, - {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000}, - {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000}, - {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000}, - {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000}, - {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000}, - {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000}, - {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000}, - {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000}, - {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000}, - {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000}, - {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000}, - {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000}, - {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000}, - {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000}, - {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000}, - {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000}, - {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000}, - - {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001}, - {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002}, - {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004}, - {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008}, - {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010}, - {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020}, - {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040}, - {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080}, - {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100}, - {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200}, - {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000}, - {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000}, - {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000}, - {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000}, - {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000}, - {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000}, - {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000}, - {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000}, - {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000}, - {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000}, - {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000}, - {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000}, - {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000}, - {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000}, - {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000}, - {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000}, - - {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001}, - {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002}, - {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004}, - {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010}, - {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020}, - {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040}, - - {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008}, - - {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001}, - {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004}, - {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010}, - {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020}, - {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040}, - {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080}, - {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100}, - {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400}, - {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800}, - - {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010}, - - {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000}, - {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000}, - - {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100}, + {FEAT_DEF(SSE3), 0x00000001, 0, REG_ECX, 0}, + {FEAT_DEF(PCLMULQDQ), 0x00000001, 0, REG_ECX, 1}, + {FEAT_DEF(DTES64), 0x00000001, 0, REG_ECX, 2}, + {FEAT_DEF(MONITOR), 0x00000001, 0, REG_ECX, 3}, + {FEAT_DEF(DS_CPL), 0x00000001, 0, REG_ECX, 4}, + {FEAT_DEF(VMX), 0x00000001, 0, REG_ECX, 5}, + {FEAT_DEF(SMX), 0x00000001, 0, REG_ECX, 6}, + {FEAT_DEF(EIST), 0x00000001, 0, REG_ECX, 7}, + {FEAT_DEF(TM2), 0x00000001, 0, REG_ECX, 8}, + {FEAT_DEF(SSSE3), 0x00000001, 0, REG_ECX, 9}, + {FEAT_DEF(CNXT_ID), 0x00000001, 0, REG_ECX, 10}, + {FEAT_DEF(FMA), 0x00000001, 0, REG_ECX, 12}, + {FEAT_DEF(CMPXCHG16B), 0x00000001, 0, REG_ECX, 13}, + {FEAT_DEF(XTPR), 0x00000001, 0, REG_ECX, 14}, + {FEAT_DEF(PDCM), 0x00000001, 0, REG_ECX, 15}, + {FEAT_DEF(PCID), 0x00000001, 0, REG_ECX, 17}, + {FEAT_DEF(DCA), 0x00000001, 0, REG_ECX, 18}, + {FEAT_DEF(SSE4_1), 0x00000001, 0, REG_ECX, 19}, + {FEAT_DEF(SSE4_2), 0x00000001, 0, REG_ECX, 20}, + {FEAT_DEF(X2APIC), 0x00000001, 0, REG_ECX, 21}, + {FEAT_DEF(MOVBE), 0x00000001, 0, REG_ECX, 22}, + {FEAT_DEF(POPCNT), 0x00000001, 0, REG_ECX, 23}, + {FEAT_DEF(TSC_DEADLINE), 0x00000001, 0, REG_ECX, 24}, + {FEAT_DEF(AES), 0x00000001, 0, REG_ECX, 25}, + {FEAT_DEF(XSAVE), 0x00000001, 0, REG_ECX, 26}, + {FEAT_DEF(OSXSAVE), 0x00000001, 0, REG_ECX, 27}, + {FEAT_DEF(AVX), 0x00000001, 0, REG_ECX, 28}, + {FEAT_DEF(F16C), 0x00000001, 0, REG_ECX, 29}, + {FEAT_DEF(RDRAND), 0x00000001, 0, REG_ECX, 30}, + + {FEAT_DEF(FPU), 0x00000001, 0, REG_EDX, 0}, + {FEAT_DEF(VME), 0x00000001, 0, REG_EDX, 1}, + {FEAT_DEF(DE), 0x00000001, 0, REG_EDX, 2}, + {FEAT_DEF(PSE), 0x00000001, 0, REG_EDX, 3}, + {FEAT_DEF(TSC), 0x00000001, 0, REG_EDX, 4}, + {FEAT_DEF(MSR), 0x00000001, 0, REG_EDX, 5}, + {FEAT_DEF(PAE), 0x00000001, 0, REG_EDX, 6}, + {FEAT_DEF(MCE), 0x00000001, 0, REG_EDX, 7}, + {FEAT_DEF(CX8), 0x00000001, 0, REG_EDX, 8}, + {FEAT_DEF(APIC), 0x00000001, 0, REG_EDX, 9}, + {FEAT_DEF(SEP), 0x00000001, 0, REG_EDX, 11}, + {FEAT_DEF(MTRR), 0x00000001, 0, REG_EDX, 12}, + {FEAT_DEF(PGE), 0x00000001, 0, REG_EDX, 13}, + {FEAT_DEF(MCA), 0x00000001, 0, REG_EDX, 14}, + {FEAT_DEF(CMOV), 0x00000001, 0, REG_EDX, 15}, + {FEAT_DEF(PAT), 0x00000001, 0, REG_EDX, 16}, + {FEAT_DEF(PSE36), 0x00000001, 0, REG_EDX, 17}, + {FEAT_DEF(PSN), 0x00000001, 0, REG_EDX, 18}, + {FEAT_DEF(CLFSH), 0x00000001, 0, REG_EDX, 19}, + {FEAT_DEF(DS), 0x00000001, 0, REG_EDX, 21}, + {FEAT_DEF(ACPI), 0x00000001, 0, REG_EDX, 22}, + {FEAT_DEF(MMX), 0x00000001, 0, REG_EDX, 23}, + {FEAT_DEF(FXSR), 0x00000001, 0, REG_EDX, 24}, + {FEAT_DEF(SSE), 0x00000001, 0, REG_EDX, 25}, + {FEAT_DEF(SSE2), 0x00000001, 0, REG_EDX, 26}, + {FEAT_DEF(SS), 0x00000001, 0, REG_EDX, 27}, + {FEAT_DEF(HTT), 0x00000001, 0, REG_EDX, 28}, + {FEAT_DEF(TM), 0x00000001, 0, REG_EDX, 29}, + {FEAT_DEF(PBE), 0x00000001, 0, REG_EDX, 31}, + + {FEAT_DEF(DIGTEMP), 0x00000006, 0, REG_EAX, 0}, + {FEAT_DEF(TRBOBST), 0x00000006, 0, REG_EAX, 1}, + {FEAT_DEF(ARAT), 0x00000006, 0, REG_EAX, 2}, + {FEAT_DEF(PLN), 0x00000006, 0, REG_EAX, 4}, + {FEAT_DEF(ECMD), 0x00000006, 0, REG_EAX, 5}, + {FEAT_DEF(PTM), 0x00000006, 0, REG_EAX, 6}, + + {FEAT_DEF(MPERF_APERF_MSR), 0x00000006, 0, REG_ECX, 0}, + {FEAT_DEF(ACNT2), 0x00000006, 0, REG_ECX, 1}, + {FEAT_DEF(ENERGY_EFF), 0x00000006, 0, REG_ECX, 3}, + + {FEAT_DEF(FSGSBASE), 0x00000007, 0, REG_EBX, 0}, + {FEAT_DEF(BMI1), 0x00000007, 0, REG_EBX, 2}, + {FEAT_DEF(HLE), 0x00000007, 0, REG_EBX, 4}, + {FEAT_DEF(AVX2), 0x00000007, 0, REG_EBX, 5}, + {FEAT_DEF(SMEP), 0x00000007, 0, REG_EBX, 6}, + {FEAT_DEF(BMI2), 0x00000007, 0, REG_EBX, 7}, + {FEAT_DEF(ERMS), 0x00000007, 0, REG_EBX, 8}, + {FEAT_DEF(INVPCID), 0x00000007, 0, REG_EBX, 10}, + {FEAT_DEF(RTM), 0x00000007, 0, REG_EBX, 11}, + + {FEAT_DEF(LAHF_SAHF), 0x80000001, 0, REG_ECX, 0}, + {FEAT_DEF(LZCNT), 0x80000001, 0, REG_ECX, 4}, + + {FEAT_DEF(SYSCALL), 0x80000001, 0, REG_EDX, 11}, + {FEAT_DEF(XD), 0x80000001, 0, REG_EDX, 20}, + {FEAT_DEF(1GB_PG), 0x80000001, 0, REG_EDX, 26}, + {FEAT_DEF(RDTSCP), 0x80000001, 0, REG_EDX, 27}, + {FEAT_DEF(EM64T), 0x80000001, 0, REG_EDX, 29}, + + {FEAT_DEF(INVTSC), 0x80000007, 0, REG_EDX, 8}, }; /* @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = { * This function, when compiled with GCC, will generate architecture-neutral * code, as per GCC manual. */ -static inline int -rte_cpu_get_features(struct cpuid_parameters_t params) +static inline void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out) { - int eax, ebx, ecx, edx; /* registers */ - -#ifndef __PIC__ - asm volatile ("cpuid" - /* output */ - : "=a" (eax), - "=b" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "b" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[REG_EBX]), + "=a" (out[REG_EAX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); #else - asm volatile ( - "mov %%ebx, %%edi\n" - "cpuid\n" - "xchgl %%ebx, %%edi;\n" - : "=a" (eax), - "=D" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "D" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); + asm volatile("cpuid" + : "=b" (out[REG_EBX]), + "=a" (out[REG_EAX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); #endif - - switch (params.return_register) { - case REG_EAX: - return eax; - case REG_EBX: - return ebx; - case REG_ECX: - return ecx; - case REG_EDX: - return edx; - default: - return 0; - } } /* @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params) int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { - int value; + const struct feature_entry *feat; + cpu_registers_t regs; if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ return -ENOENT; - /* get value of the register containing the desired feature */ - value = rte_cpu_get_features(cpu_feature_table[feature].params); + feat = &cpu_feature_table[feature]; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, ®s); /* check if the feature is enabled */ - return (cpu_feature_table[feature].feature_mask & value) > 0; + return (regs[feat->reg] >> feat->bit) & 1; } /** @@ -273,7 +243,7 @@ rte_cpu_check_supported(void) unsigned i; for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++) - if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) { + if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) { fprintf(stderr, "ERROR: This system does not support \"%s\".\n" "Please check that RTE_MACHINE is set correctly.\n", -- 1.8.5.3