From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp.tuxdriver.com (charlotte.tuxdriver.com [70.61.120.58]) by dpdk.org (Postfix) with ESMTP id 32D032E88 for ; Fri, 21 Mar 2014 15:47:48 +0100 (CET) Received: from hmsreliant.think-freely.org ([2001:470:8:a08:7aac:c0ff:fec2:933b] helo=localhost) by smtp.tuxdriver.com with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.63) (envelope-from ) id 1WR0kz-0007Ls-Vp; Fri, 21 Mar 2014 10:49:19 -0400 From: Neil Horman To: dev@dpdk.org Date: Fri, 21 Mar 2014 10:49:00 -0400 Message-Id: <1395413340-27392-1-git-send-email-nhorman@tuxdriver.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <20140320163921.GC7721@hmsreliant.think-freely.org> References: <20140320163921.GC7721@hmsreliant.think-freely.org> X-Spam-Score: -2.9 (--) X-Spam-Status: No Cc: "H. Peter Anvin" Subject: [dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 21 Mar 2014 14:47:48 -0000 From: "H. Peter Anvin" Neil Horman reported that on x86-64 the upper half of %rbx would get clobbered when the code was compiled PIC or PIE, because the i386-specific code to preserve %ebx was incorrectly compiled. However, the code is really way more complex than it needs to be. For one thing, the CPUID instruction only needs %eax (leaf) and %ecx (subleaf) as parameters, and since we are testing for bits, we might as well list the bits explicitly. Furthermore, we can use an array rather than doing a switch statement inside a structure. Reported-by: Neil Horman Signed-off-by: H. Peter Anvin Tested-by: Neil Horman --- Change notes: v2) Corrected build errors Fixed cpuid_register_t reference passing Fixed typedef name typo --- lib/librte_eal/common/eal_common_cpuflags.c | 274 +++++++++++++--------------- 1 file changed, 123 insertions(+), 151 deletions(-) diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index 1ebf78c..438d9c5 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -54,21 +54,12 @@ */ enum cpu_register_t { REG_EAX = 0, - REG_EBX, REG_ECX, REG_EDX, + REG_EBX, }; -/** - * Parameters for CPUID instruction - */ -struct cpuid_parameters_t { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - enum cpu_register_t return_register; -}; +typedef uint32_t cpuid_registers_t[4]; #define CPU_FLAG_NAME_MAX_LEN 64 @@ -78,8 +69,10 @@ struct cpuid_parameters_t { struct feature_entry { enum rte_cpu_flag_t feature; /**< feature name */ char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ - struct cpuid_parameters_t params; /**< cpuid parameters */ - uint32_t feature_mask; /**< bitmask for feature */ + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ }; #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f @@ -88,97 +81,97 @@ struct feature_entry { * An array that holds feature entries */ static const struct feature_entry cpu_feature_table[] = { - {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004}, - {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008}, - {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010}, - {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020}, - {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040}, - {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080}, - {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100}, - {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200}, - {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400}, - {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000}, - {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000}, - {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000}, - {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000}, - {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000}, - {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000}, - {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000}, - {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000}, - {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000}, - {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000}, - {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000}, - {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000}, - {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000}, - {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000}, - {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000}, - {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000}, - {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000}, - {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000}, - - {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001}, - {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002}, - {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004}, - {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008}, - {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010}, - {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020}, - {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040}, - {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080}, - {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100}, - {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200}, - {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000}, - {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000}, - {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000}, - {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000}, - {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000}, - {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000}, - {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000}, - {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000}, - {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000}, - {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000}, - {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000}, - {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000}, - {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000}, - {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000}, - {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000}, - {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000}, - - {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001}, - {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002}, - {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004}, - {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010}, - {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020}, - {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040}, - - {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008}, - - {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001}, - {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004}, - {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010}, - {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020}, - {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040}, - {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080}, - {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100}, - {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400}, - {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800}, - - {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010}, - - {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000}, - {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000}, - - {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100}, + {FEAT_DEF(SSE3), 0x00000001, 0, REG_ECX, 0}, + {FEAT_DEF(PCLMULQDQ), 0x00000001, 0, REG_ECX, 1}, + {FEAT_DEF(DTES64), 0x00000001, 0, REG_ECX, 2}, + {FEAT_DEF(MONITOR), 0x00000001, 0, REG_ECX, 3}, + {FEAT_DEF(DS_CPL), 0x00000001, 0, REG_ECX, 4}, + {FEAT_DEF(VMX), 0x00000001, 0, REG_ECX, 5}, + {FEAT_DEF(SMX), 0x00000001, 0, REG_ECX, 6}, + {FEAT_DEF(EIST), 0x00000001, 0, REG_ECX, 7}, + {FEAT_DEF(TM2), 0x00000001, 0, REG_ECX, 8}, + {FEAT_DEF(SSSE3), 0x00000001, 0, REG_ECX, 9}, + {FEAT_DEF(CNXT_ID), 0x00000001, 0, REG_ECX, 10}, + {FEAT_DEF(FMA), 0x00000001, 0, REG_ECX, 12}, + {FEAT_DEF(CMPXCHG16B), 0x00000001, 0, REG_ECX, 13}, + {FEAT_DEF(XTPR), 0x00000001, 0, REG_ECX, 14}, + {FEAT_DEF(PDCM), 0x00000001, 0, REG_ECX, 15}, + {FEAT_DEF(PCID), 0x00000001, 0, REG_ECX, 17}, + {FEAT_DEF(DCA), 0x00000001, 0, REG_ECX, 18}, + {FEAT_DEF(SSE4_1), 0x00000001, 0, REG_ECX, 19}, + {FEAT_DEF(SSE4_2), 0x00000001, 0, REG_ECX, 20}, + {FEAT_DEF(X2APIC), 0x00000001, 0, REG_ECX, 21}, + {FEAT_DEF(MOVBE), 0x00000001, 0, REG_ECX, 22}, + {FEAT_DEF(POPCNT), 0x00000001, 0, REG_ECX, 23}, + {FEAT_DEF(TSC_DEADLINE), 0x00000001, 0, REG_ECX, 24}, + {FEAT_DEF(AES), 0x00000001, 0, REG_ECX, 25}, + {FEAT_DEF(XSAVE), 0x00000001, 0, REG_ECX, 26}, + {FEAT_DEF(OSXSAVE), 0x00000001, 0, REG_ECX, 27}, + {FEAT_DEF(AVX), 0x00000001, 0, REG_ECX, 28}, + {FEAT_DEF(F16C), 0x00000001, 0, REG_ECX, 29}, + {FEAT_DEF(RDRAND), 0x00000001, 0, REG_ECX, 30}, + + {FEAT_DEF(FPU), 0x00000001, 0, REG_EDX, 0}, + {FEAT_DEF(VME), 0x00000001, 0, REG_EDX, 1}, + {FEAT_DEF(DE), 0x00000001, 0, REG_EDX, 2}, + {FEAT_DEF(PSE), 0x00000001, 0, REG_EDX, 3}, + {FEAT_DEF(TSC), 0x00000001, 0, REG_EDX, 4}, + {FEAT_DEF(MSR), 0x00000001, 0, REG_EDX, 5}, + {FEAT_DEF(PAE), 0x00000001, 0, REG_EDX, 6}, + {FEAT_DEF(MCE), 0x00000001, 0, REG_EDX, 7}, + {FEAT_DEF(CX8), 0x00000001, 0, REG_EDX, 8}, + {FEAT_DEF(APIC), 0x00000001, 0, REG_EDX, 9}, + {FEAT_DEF(SEP), 0x00000001, 0, REG_EDX, 11}, + {FEAT_DEF(MTRR), 0x00000001, 0, REG_EDX, 12}, + {FEAT_DEF(PGE), 0x00000001, 0, REG_EDX, 13}, + {FEAT_DEF(MCA), 0x00000001, 0, REG_EDX, 14}, + {FEAT_DEF(CMOV), 0x00000001, 0, REG_EDX, 15}, + {FEAT_DEF(PAT), 0x00000001, 0, REG_EDX, 16}, + {FEAT_DEF(PSE36), 0x00000001, 0, REG_EDX, 17}, + {FEAT_DEF(PSN), 0x00000001, 0, REG_EDX, 18}, + {FEAT_DEF(CLFSH), 0x00000001, 0, REG_EDX, 19}, + {FEAT_DEF(DS), 0x00000001, 0, REG_EDX, 21}, + {FEAT_DEF(ACPI), 0x00000001, 0, REG_EDX, 22}, + {FEAT_DEF(MMX), 0x00000001, 0, REG_EDX, 23}, + {FEAT_DEF(FXSR), 0x00000001, 0, REG_EDX, 24}, + {FEAT_DEF(SSE), 0x00000001, 0, REG_EDX, 25}, + {FEAT_DEF(SSE2), 0x00000001, 0, REG_EDX, 26}, + {FEAT_DEF(SS), 0x00000001, 0, REG_EDX, 27}, + {FEAT_DEF(HTT), 0x00000001, 0, REG_EDX, 28}, + {FEAT_DEF(TM), 0x00000001, 0, REG_EDX, 29}, + {FEAT_DEF(PBE), 0x00000001, 0, REG_EDX, 31}, + + {FEAT_DEF(DIGTEMP), 0x00000006, 0, REG_EAX, 0}, + {FEAT_DEF(TRBOBST), 0x00000006, 0, REG_EAX, 1}, + {FEAT_DEF(ARAT), 0x00000006, 0, REG_EAX, 2}, + {FEAT_DEF(PLN), 0x00000006, 0, REG_EAX, 4}, + {FEAT_DEF(ECMD), 0x00000006, 0, REG_EAX, 5}, + {FEAT_DEF(PTM), 0x00000006, 0, REG_EAX, 6}, + + {FEAT_DEF(MPERF_APERF_MSR), 0x00000006, 0, REG_ECX, 0}, + {FEAT_DEF(ACNT2), 0x00000006, 0, REG_ECX, 1}, + {FEAT_DEF(ENERGY_EFF), 0x00000006, 0, REG_ECX, 3}, + + {FEAT_DEF(FSGSBASE), 0x00000007, 0, REG_EBX, 0}, + {FEAT_DEF(BMI1), 0x00000007, 0, REG_EBX, 2}, + {FEAT_DEF(HLE), 0x00000007, 0, REG_EBX, 4}, + {FEAT_DEF(AVX2), 0x00000007, 0, REG_EBX, 5}, + {FEAT_DEF(SMEP), 0x00000007, 0, REG_EBX, 6}, + {FEAT_DEF(BMI2), 0x00000007, 0, REG_EBX, 7}, + {FEAT_DEF(ERMS), 0x00000007, 0, REG_EBX, 8}, + {FEAT_DEF(INVPCID), 0x00000007, 0, REG_EBX, 10}, + {FEAT_DEF(RTM), 0x00000007, 0, REG_EBX, 11}, + + {FEAT_DEF(LAHF_SAHF), 0x80000001, 0, REG_ECX, 0}, + {FEAT_DEF(LZCNT), 0x80000001, 0, REG_ECX, 4}, + + {FEAT_DEF(SYSCALL), 0x80000001, 0, REG_EDX, 11}, + {FEAT_DEF(XD), 0x80000001, 0, REG_EDX, 20}, + {FEAT_DEF(1GB_PG), 0x80000001, 0, REG_EDX, 26}, + {FEAT_DEF(RDTSCP), 0x80000001, 0, REG_EDX, 27}, + {FEAT_DEF(EM64T), 0x80000001, 0, REG_EDX, 29}, + + {FEAT_DEF(INVTSC), 0x80000007, 0, REG_EDX, 8}, }; /* @@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = { * This function, when compiled with GCC, will generate architecture-neutral * code, as per GCC manual. */ -static inline int -rte_cpu_get_features(struct cpuid_parameters_t params) +static inline void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) { - int eax, ebx, ecx, edx; /* registers */ - -#ifndef __PIC__ - asm volatile ("cpuid" - /* output */ - : "=a" (eax), - "=b" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "b" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[REG_EBX]), + "=a" (out[REG_EAX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); #else - asm volatile ( - "mov %%ebx, %%edi\n" - "cpuid\n" - "xchgl %%ebx, %%edi;\n" - : "=a" (eax), - "=D" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "D" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); -#endif - switch (params.return_register) { - case REG_EAX: - return eax; - case REG_EBX: - return ebx; - case REG_ECX: - return ecx; - case REG_EDX: - return edx; - default: - return 0; - } + asm volatile("cpuid" + : "=a" (out[REG_EAX]), + "=b" (out[REG_EBX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); + +#endif } /* @@ -240,17 +209,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params) int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { - int value; + const struct feature_entry *feat; + cpuid_registers_t regs; if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ return -ENOENT; - /* get value of the register containing the desired feature */ - value = rte_cpu_get_features(cpu_feature_table[feature].params); + feat = &cpu_feature_table[feature]; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, regs); /* check if the feature is enabled */ - return (cpu_feature_table[feature].feature_mask & value) > 0; + return (regs[feat->reg] >> feat->bit) & 1; } /** @@ -273,7 +245,7 @@ rte_cpu_check_supported(void) unsigned i; for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++) - if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) { + if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) { fprintf(stderr, "ERROR: This system does not support \"%s\".\n" "Please check that RTE_MACHINE is set correctly.\n", -- 1.8.3.1