DPDK patches and discussions
 help / color / mirror / Atom feed
* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
       [not found] <1395330830-1310-1-git-send-email-hpa@linux.intel.com>
@ 2014-03-20 16:39 ` Neil Horman
  2014-03-20 17:02   ` Thomas Monjalon
                     ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Neil Horman @ 2014-03-20 16:39 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>

> ---
>  lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
>  1 file changed, 121 insertions(+), 151 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> index 1ebf78cc2a48..bf66ad9d94ec 100644
> --- a/lib/librte_eal/common/eal_common_cpuflags.c
> +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> @@ -54,21 +54,12 @@
>   */
>  enum cpu_register_t {
>  	REG_EAX = 0,
> -	REG_EBX,
>  	REG_ECX,
>  	REG_EDX,
> +	REG_EBX,
>  };
>  
> -/**
> - * Parameters for CPUID instruction
> - */
> -struct cpuid_parameters_t {
> -	uint32_t eax;
> -	uint32_t ebx;
> -	uint32_t ecx;
> -	uint32_t edx;
> -	enum cpu_register_t return_register;
> -};
> +typedef uint32_t cpuid_registers_t[4];
>  
>  #define CPU_FLAG_NAME_MAX_LEN 64
>  
> @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
>  struct feature_entry {
>  	enum rte_cpu_flag_t feature;            /**< feature name */
>  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> -	uint32_t feature_mask;                  /**< bitmask for feature */
> +	uint32_t leaf;				/**< cpuid leaf */
> +	uint32_t subleaf;			/**< cpuid subleaf */
> +	uint32_t reg;				/**< cpuid register */
> +	uint32_t bit;				/**< cpuid register bit */
>  };
>  
>  #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> @@ -88,97 +81,97 @@ struct feature_entry {
>   * An array that holds feature entries
>   */
>  static const struct feature_entry cpu_feature_table[] = {
> -	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> -	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> -	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> -	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> -	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> -	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> -	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> -	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> -	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> -	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> -	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> -	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> -	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> -	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> -	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> -	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> -	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> -	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> -	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> -	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> -	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> -	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> -	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> -	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> -	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> -	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> -	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> -
> -	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> -	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> -	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> -	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> -	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> -	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> -	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> -	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> -	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> -	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> -	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> -	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> -	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> -	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> -	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> -	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> -	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> -	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> -	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> -	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> -	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> -	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> -	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> -	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> -	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> -	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> -
> -	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> -	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> -	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> -	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> -	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> -	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> -
> -	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> -	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> -
> -	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> -	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> -	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> -	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> -	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> -	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> -	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> -	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> -	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> -
> -	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> -	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> -
> -	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> -	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> -	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> -	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> -	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> -
> -	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> +	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
> +	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
> +	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
> +	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
> +	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
> +	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
> +	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
> +	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
> +	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
> +	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
> +	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
> +	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
> +	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
> +	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
> +	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
> +	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
> +	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
> +	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
> +	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
> +	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
> +	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
> +	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
> +	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
> +	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
> +	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
> +	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
> +	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
> +	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
> +
> +	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
> +	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
> +	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
> +	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
> +	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
> +	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
> +	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
> +	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
> +	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
> +	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
> +	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
> +	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
> +	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
> +	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
> +	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
> +	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
> +	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
> +	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
> +	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
> +	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
> +	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
> +	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
> +	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
> +	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
> +	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
> +	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
> +
> +	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
> +	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
> +	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
> +	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
> +	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
> +	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
> +
> +	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
> +	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
> +	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
> +
> +	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
> +	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
> +	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
> +	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
> +	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
> +	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
> +	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
> +	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
> +	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
> +
> +	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
> +	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
> +
> +	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
> +	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
> +	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
> +	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
> +	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
> +
> +	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
>  };
>  
>  /*
> @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
>   * This function, when compiled with GCC, will generate architecture-neutral
>   * code, as per GCC manual.
>   */
> -static inline int
> -rte_cpu_get_features(struct cpuid_parameters_t params)
> +static inline void
> +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
>  {
> -	int eax, ebx, ecx, edx;            /* registers */
> -
> -#ifndef __PIC__
> -   asm volatile ("cpuid"
> -                 /* output */
> -                 : "=a" (eax),
> -                   "=b" (ebx),
> -                   "=c" (ecx),
> -                   "=d" (edx)
> -                 /* input */
> -                 : "a" (params.eax),
> -                   "b" (params.ebx),
> -                   "c" (params.ecx),
> -                   "d" (params.edx));
> +#if defined(__i386__) && defined(__PIC__)
> +    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> +    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> +		 : "=r" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #else
> -	asm volatile ( 
> -            "mov %%ebx, %%edi\n"
> -            "cpuid\n"
> -            "xchgl %%ebx, %%edi;\n"
> -            : "=a" (eax),
> -              "=D" (ebx),
> -              "=c" (ecx),
> -              "=d" (edx)
> -            /* input */
> -            : "a" (params.eax),
> -              "D" (params.ebx),
> -              "c" (params.ecx),
> -              "d" (params.edx));
> +    asm volatile("cpuid"
> +		 : "=b" (out[REG_EBX]),
> +		   "=a" (out[REG_EAX]),
> +		   "=c" (out[REG_ECX]),
> +		   "=d" (out[REG_EDX])
> +		 : "a" (leaf), "c" (subleaf));
>  #endif
> -
> -	switch (params.return_register) {
> -	case REG_EAX:
> -		return eax;
> -	case REG_EBX:
> -		return ebx;
> -	case REG_ECX:
> -		return ecx;
> -	case REG_EDX:
> -		return edx;
> -	default:
> -		return 0;
> -	}
>  }
>  
>  /*
> @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
>  int
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  {
> -	int value;
> +	const struct feature_entry *feat;
> +	cpu_registers_t regs;
>  
>  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
>  		/* Flag does not match anything in the feature tables */
>  		return -ENOENT;
>  
> -	/* get value of the register containing the desired feature */
> -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> +	feat = &cpu_feature_table[feature];
> +
> +	/* get the cpuid leaf containing the desired feature */
> +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
>  
>  	/* check if the feature is enabled */
> -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> +	return (regs[feat->reg] >> feat->bit) & 1;
>  }
>  
>  /**
> @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
>  	unsigned i;
>  
>  	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> -		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> +		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
>  			fprintf(stderr,
>  			        "ERROR: This system does not support \"%s\".\n"
>  			        "Please check that RTE_MACHINE is set correctly.\n",
> -- 
> 1.8.5.3
> 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
@ 2014-03-20 17:02   ` Thomas Monjalon
  2014-03-20 18:04   ` Neil Horman
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Thomas Monjalon @ 2014-03-20 17:02 UTC (permalink / raw)
  To: Neil Horman, H. Peter Anvin; +Cc: dev, H. Peter Anvin

Hi,

20/03/2014 12:39, Neil Horman :
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> 
> Acked-by: Neil Horman <nhorman@tuxdriver.com>

This is a RFC UNTESTED patch.
So should I understand with this acknowledgement that you have tested it?
As a shared library? in 32-bit mode?

-- 
Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
  2014-03-20 17:02   ` Thomas Monjalon
@ 2014-03-20 18:04   ` Neil Horman
  2014-03-21 14:49   ` [dpdk-dev] [PATCH v2] " Neil Horman
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Neil Horman @ 2014-03-20 18:04 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev, H. Peter Anvin

On Thu, Mar 20, 2014 at 12:39:21PM -0400, Neil Horman wrote:
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> Acked-by: Neil Horman <nhorman@tuxdriver.com>
> 
Sorry, I'm just acking the proposed change, I've not tested it yet, though based
on our conversation, this is the right thing to do.  I'll have test reports
shortly.
Neil

> > ---
> >  lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
> >  1 file changed, 121 insertions(+), 151 deletions(-)
> > 
> > diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> > index 1ebf78cc2a48..bf66ad9d94ec 100644
> > --- a/lib/librte_eal/common/eal_common_cpuflags.c
> > +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> > @@ -54,21 +54,12 @@
> >   */
> >  enum cpu_register_t {
> >  	REG_EAX = 0,
> > -	REG_EBX,
> >  	REG_ECX,
> >  	REG_EDX,
> > +	REG_EBX,
> >  };
> >  
> > -/**
> > - * Parameters for CPUID instruction
> > - */
> > -struct cpuid_parameters_t {
> > -	uint32_t eax;
> > -	uint32_t ebx;
> > -	uint32_t ecx;
> > -	uint32_t edx;
> > -	enum cpu_register_t return_register;
> > -};
> > +typedef uint32_t cpuid_registers_t[4];
> >  
> >  #define CPU_FLAG_NAME_MAX_LEN 64
> >  
> > @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
> >  struct feature_entry {
> >  	enum rte_cpu_flag_t feature;            /**< feature name */
> >  	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
> > -	struct cpuid_parameters_t params;       /**< cpuid parameters */
> > -	uint32_t feature_mask;                  /**< bitmask for feature */
> > +	uint32_t leaf;				/**< cpuid leaf */
> > +	uint32_t subleaf;			/**< cpuid subleaf */
> > +	uint32_t reg;				/**< cpuid register */
> > +	uint32_t bit;				/**< cpuid register bit */
> >  };
> >  
> >  #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> > @@ -88,97 +81,97 @@ struct feature_entry {
> >   * An array that holds feature entries
> >   */
> >  static const struct feature_entry cpu_feature_table[] = {
> > -	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> > -	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> > -	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> > -	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> > -	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> > -	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> > -	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> > -	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> > -	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> > -	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> > -	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> > -	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> > -	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> > -	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> > -	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> > -	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> > -	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> > -	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> > -	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> > -	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> > -	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> > -	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> > -	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> > -	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> > -	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> > -	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> > -	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> > -	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> > -
> > -	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> > -	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> > -	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> > -	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> > -	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> > -	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> > -	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> > -	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> > -	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> > -	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> > -	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> > -	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> > -	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> > -	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> > -	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> > -	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> > -	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> > -	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> > -	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> > -	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> > -	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> > -	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> > -	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> > -	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> > -	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> > -	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> > -	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> > -	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> > -	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> > -
> > -	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> > -	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> > -	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> > -	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> > -	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> > -	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> > -
> > -	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> > -	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> > -
> > -	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> > -	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> > -	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> > -	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> > -	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> > -	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> > -	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> > -	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> > -	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> > -
> > -	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> > -	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> > -
> > -	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> > -	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> > -	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> > -	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> > -	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> > -
> > -	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> > +	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
> > +	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
> > +	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
> > +	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
> > +	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
> > +	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
> > +	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
> > +	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
> > +	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
> > +	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
> > +	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
> > +	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
> > +	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
> > +	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
> > +	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
> > +	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
> > +	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
> > +	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
> > +	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
> > +	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
> > +	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
> > +	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
> > +	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
> > +	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
> > +	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
> > +	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
> > +	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
> > +	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
> > +	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
> > +
> > +	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
> > +	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
> > +	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
> > +	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
> > +	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
> > +	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
> > +	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
> > +	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
> > +	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
> > +	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
> > +	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
> > +	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
> > +	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
> > +	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
> > +	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
> > +	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
> > +	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
> > +	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
> > +	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
> > +	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
> > +	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
> > +	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
> > +	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
> > +	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
> > +	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
> > +	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
> > +	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
> > +	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
> > +	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
> > +
> > +	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
> > +	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
> > +	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
> > +	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
> > +	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
> > +	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
> > +
> > +	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
> > +	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
> > +	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
> > +
> > +	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
> > +	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
> > +	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
> > +	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
> > +	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
> > +	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
> > +	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
> > +	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
> > +	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
> > +
> > +	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
> > +	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
> > +
> > +	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
> > +	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
> > +	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
> > +	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
> > +	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
> > +
> > +	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
> >  };
> >  
> >  /*
> > @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
> >   * This function, when compiled with GCC, will generate architecture-neutral
> >   * code, as per GCC manual.
> >   */
> > -static inline int
> > -rte_cpu_get_features(struct cpuid_parameters_t params)
> > +static inline void
> > +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
> >  {
> > -	int eax, ebx, ecx, edx;            /* registers */
> > -
> > -#ifndef __PIC__
> > -   asm volatile ("cpuid"
> > -                 /* output */
> > -                 : "=a" (eax),
> > -                   "=b" (ebx),
> > -                   "=c" (ecx),
> > -                   "=d" (edx)
> > -                 /* input */
> > -                 : "a" (params.eax),
> > -                   "b" (params.ebx),
> > -                   "c" (params.ecx),
> > -                   "d" (params.edx));
> > +#if defined(__i386__) && defined(__PIC__)
> > +    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> > +    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> > +		 : "=r" (out[REG_EBX]),
> > +		   "=a" (out[REG_EAX]),
> > +		   "=c" (out[REG_ECX]),
> > +		   "=d" (out[REG_EDX])
> > +		 : "a" (leaf), "c" (subleaf));
> >  #else
> > -	asm volatile ( 
> > -            "mov %%ebx, %%edi\n"
> > -            "cpuid\n"
> > -            "xchgl %%ebx, %%edi;\n"
> > -            : "=a" (eax),
> > -              "=D" (ebx),
> > -              "=c" (ecx),
> > -              "=d" (edx)
> > -            /* input */
> > -            : "a" (params.eax),
> > -              "D" (params.ebx),
> > -              "c" (params.ecx),
> > -              "d" (params.edx));
> > +    asm volatile("cpuid"
> > +		 : "=b" (out[REG_EBX]),
> > +		   "=a" (out[REG_EAX]),
> > +		   "=c" (out[REG_ECX]),
> > +		   "=d" (out[REG_EDX])
> > +		 : "a" (leaf), "c" (subleaf));
> >  #endif
> > -
> > -	switch (params.return_register) {
> > -	case REG_EAX:
> > -		return eax;
> > -	case REG_EBX:
> > -		return ebx;
> > -	case REG_ECX:
> > -		return ecx;
> > -	case REG_EDX:
> > -		return edx;
> > -	default:
> > -		return 0;
> > -	}
> >  }
> >  
> >  /*
> > @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
> >  int
> >  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
> >  {
> > -	int value;
> > +	const struct feature_entry *feat;
> > +	cpu_registers_t regs;
> >  
> >  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
> >  		/* Flag does not match anything in the feature tables */
> >  		return -ENOENT;
> >  
> > -	/* get value of the register containing the desired feature */
> > -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> > +	feat = &cpu_feature_table[feature];
> > +
> > +	/* get the cpuid leaf containing the desired feature */
> > +	rte_cpu_get_features(feat->leaf, feat->subleaf, &regs);
> >  
> >  	/* check if the feature is enabled */
> > -	return (cpu_feature_table[feature].feature_mask & value) > 0;
> > +	return (regs[feat->reg] >> feat->bit) & 1;
> >  }
> >  
> >  /**
> > @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
> >  	unsigned i;
> >  
> >  	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> > -		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> > +		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
> >  			fprintf(stderr,
> >  			        "ERROR: This system does not support \"%s\".\n"
> >  			        "Please check that RTE_MACHINE is set correctly.\n",
> > -- 
> > 1.8.5.3
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
  2014-03-20 17:02   ` Thomas Monjalon
  2014-03-20 18:04   ` Neil Horman
@ 2014-03-21 14:49   ` Neil Horman
  2014-03-21 15:03     ` H. Peter Anvin
  2014-03-24 17:44   ` [dpdk-dev] [PATCH v3] " Neil Horman
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-03-21 14:49 UTC (permalink / raw)
  To: dev; +Cc: H. Peter Anvin

From: "H. Peter Anvin" <hpa@linux.intel.com>

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Tested-by: Neil Horman <nhorman@tuxdriver.com>

---
Change notes:
v2) Corrected build errors
    Fixed cpuid_register_t reference passing
    Fixed typedef name typo
---
 lib/librte_eal/common/eal_common_cpuflags.c | 274 +++++++++++++---------------
 1 file changed, 123 insertions(+), 151 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..438d9c5 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
  */
 enum cpu_register_t {
 	REG_EAX = 0,
-	REG_EBX,
 	REG_ECX,
 	REG_EDX,
+	REG_EBX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -78,8 +69,10 @@ struct cpuid_parameters_t {
 struct feature_entry {
 	enum rte_cpu_flag_t feature;            /**< feature name */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 };
 
 #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
@@ -88,97 +81,97 @@ struct feature_entry {
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
-
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
-
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
-
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
-
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
-
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
-
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
-
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	{FEAT_DEF(SSE3),		0x00000001, 0, REG_ECX,  0},
+	{FEAT_DEF(PCLMULQDQ),		0x00000001, 0, REG_ECX,  1},
+	{FEAT_DEF(DTES64),		0x00000001, 0, REG_ECX,  2},
+	{FEAT_DEF(MONITOR),		0x00000001, 0, REG_ECX,  3},
+	{FEAT_DEF(DS_CPL),		0x00000001, 0, REG_ECX,  4},
+	{FEAT_DEF(VMX),			0x00000001, 0, REG_ECX,  5},
+	{FEAT_DEF(SMX),			0x00000001, 0, REG_ECX,  6},
+	{FEAT_DEF(EIST),		0x00000001, 0, REG_ECX,  7},
+	{FEAT_DEF(TM2),			0x00000001, 0, REG_ECX,  8},
+	{FEAT_DEF(SSSE3),		0x00000001, 0, REG_ECX,  9},
+	{FEAT_DEF(CNXT_ID),		0x00000001, 0, REG_ECX, 10},
+	{FEAT_DEF(FMA),			0x00000001, 0, REG_ECX, 12},
+	{FEAT_DEF(CMPXCHG16B),		0x00000001, 0, REG_ECX, 13},
+	{FEAT_DEF(XTPR),		0x00000001, 0, REG_ECX, 14},
+	{FEAT_DEF(PDCM),		0x00000001, 0, REG_ECX, 15},
+	{FEAT_DEF(PCID),		0x00000001, 0, REG_ECX, 17},
+	{FEAT_DEF(DCA),			0x00000001, 0, REG_ECX, 18},
+	{FEAT_DEF(SSE4_1),		0x00000001, 0, REG_ECX, 19},
+	{FEAT_DEF(SSE4_2),		0x00000001, 0, REG_ECX, 20},
+	{FEAT_DEF(X2APIC),		0x00000001, 0, REG_ECX, 21},
+	{FEAT_DEF(MOVBE),		0x00000001, 0, REG_ECX, 22},
+	{FEAT_DEF(POPCNT),		0x00000001, 0, REG_ECX, 23},
+	{FEAT_DEF(TSC_DEADLINE),	0x00000001, 0, REG_ECX, 24},
+	{FEAT_DEF(AES),			0x00000001, 0, REG_ECX, 25},
+	{FEAT_DEF(XSAVE),		0x00000001, 0, REG_ECX, 26},
+	{FEAT_DEF(OSXSAVE),		0x00000001, 0, REG_ECX, 27},
+	{FEAT_DEF(AVX),			0x00000001, 0, REG_ECX, 28},
+	{FEAT_DEF(F16C),		0x00000001, 0, REG_ECX, 29},
+	{FEAT_DEF(RDRAND),		0x00000001, 0, REG_ECX, 30},
+
+	{FEAT_DEF(FPU),			0x00000001, 0, REG_EDX,  0},
+	{FEAT_DEF(VME),			0x00000001, 0, REG_EDX,  1},
+	{FEAT_DEF(DE),			0x00000001, 0, REG_EDX,  2},
+	{FEAT_DEF(PSE),			0x00000001, 0, REG_EDX,  3},
+	{FEAT_DEF(TSC),			0x00000001, 0, REG_EDX,  4},
+	{FEAT_DEF(MSR),			0x00000001, 0, REG_EDX,  5},
+	{FEAT_DEF(PAE),			0x00000001, 0, REG_EDX,  6},
+	{FEAT_DEF(MCE),			0x00000001, 0, REG_EDX,  7},
+	{FEAT_DEF(CX8),			0x00000001, 0, REG_EDX,  8},
+	{FEAT_DEF(APIC),		0x00000001, 0, REG_EDX,  9},
+	{FEAT_DEF(SEP),			0x00000001, 0, REG_EDX, 11},
+	{FEAT_DEF(MTRR),		0x00000001, 0, REG_EDX, 12},
+	{FEAT_DEF(PGE),			0x00000001, 0, REG_EDX, 13},
+	{FEAT_DEF(MCA),			0x00000001, 0, REG_EDX, 14},
+	{FEAT_DEF(CMOV),		0x00000001, 0, REG_EDX, 15},
+	{FEAT_DEF(PAT),			0x00000001, 0, REG_EDX, 16},
+	{FEAT_DEF(PSE36),		0x00000001, 0, REG_EDX, 17},
+	{FEAT_DEF(PSN),			0x00000001, 0, REG_EDX, 18},
+	{FEAT_DEF(CLFSH),		0x00000001, 0, REG_EDX, 19},
+	{FEAT_DEF(DS),			0x00000001, 0, REG_EDX, 21},
+	{FEAT_DEF(ACPI),		0x00000001, 0, REG_EDX, 22},
+	{FEAT_DEF(MMX),			0x00000001, 0, REG_EDX, 23},
+	{FEAT_DEF(FXSR),		0x00000001, 0, REG_EDX, 24},
+	{FEAT_DEF(SSE),			0x00000001, 0, REG_EDX, 25},
+	{FEAT_DEF(SSE2),		0x00000001, 0, REG_EDX, 26},
+	{FEAT_DEF(SS),			0x00000001, 0, REG_EDX, 27},
+	{FEAT_DEF(HTT),			0x00000001, 0, REG_EDX, 28},
+	{FEAT_DEF(TM),			0x00000001, 0, REG_EDX, 29},
+	{FEAT_DEF(PBE),			0x00000001, 0, REG_EDX, 31},
+
+	{FEAT_DEF(DIGTEMP),		0x00000006, 0, REG_EAX,  0},
+	{FEAT_DEF(TRBOBST),		0x00000006, 0, REG_EAX,  1},
+	{FEAT_DEF(ARAT),		0x00000006, 0, REG_EAX,  2},
+	{FEAT_DEF(PLN),			0x00000006, 0, REG_EAX,  4},
+	{FEAT_DEF(ECMD),		0x00000006, 0, REG_EAX,  5},
+	{FEAT_DEF(PTM),			0x00000006, 0, REG_EAX,  6},
+
+	{FEAT_DEF(MPERF_APERF_MSR),	0x00000006, 0, REG_ECX,  0},
+	{FEAT_DEF(ACNT2),		0x00000006, 0, REG_ECX,  1},
+	{FEAT_DEF(ENERGY_EFF),		0x00000006, 0, REG_ECX,  3},
+
+	{FEAT_DEF(FSGSBASE),		0x00000007, 0, REG_EBX,  0},
+	{FEAT_DEF(BMI1),		0x00000007, 0, REG_EBX,  2},
+	{FEAT_DEF(HLE),			0x00000007, 0, REG_EBX,  4},
+	{FEAT_DEF(AVX2),		0x00000007, 0, REG_EBX,  5},
+	{FEAT_DEF(SMEP),		0x00000007, 0, REG_EBX,  6},
+	{FEAT_DEF(BMI2),		0x00000007, 0, REG_EBX,  7},
+	{FEAT_DEF(ERMS),		0x00000007, 0, REG_EBX,  8},
+	{FEAT_DEF(INVPCID),		0x00000007, 0, REG_EBX, 10},
+	{FEAT_DEF(RTM),			0x00000007, 0, REG_EBX, 11},
+
+	{FEAT_DEF(LAHF_SAHF),		0x80000001, 0, REG_ECX,  0},
+	{FEAT_DEF(LZCNT),		0x80000001, 0, REG_ECX,  4},
+
+	{FEAT_DEF(SYSCALL),		0x80000001, 0, REG_EDX, 11},
+	{FEAT_DEF(XD),			0x80000001, 0, REG_EDX, 20},
+	{FEAT_DEF(1GB_PG),		0x80000001, 0, REG_EDX, 26},
+	{FEAT_DEF(RDTSCP),		0x80000001, 0, REG_EDX, 27},
+	{FEAT_DEF(EM64T),		0x80000001, 0, REG_EDX, 29},
+
+	{FEAT_DEF(INVTSC),		0x80000007, 0, REG_EDX,  8},
 };
 
 /*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
-#endif
 
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
+    asm volatile("cpuid"
+		 : "=a" (out[REG_EAX]),
+		   "=b" (out[REG_EBX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+
+#endif
 }
 
 /*
@@ -240,17 +209,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[feat->reg] >> feat->bit) & 1;
 }
 
 /**
@@ -273,7 +245,7 @@ rte_cpu_check_supported(void)
 	unsigned i;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-21 14:49   ` [dpdk-dev] [PATCH v2] " Neil Horman
@ 2014-03-21 15:03     ` H. Peter Anvin
  2014-03-21 17:48       ` Neil Horman
  0 siblings, 1 reply; 20+ messages in thread
From: H. Peter Anvin @ 2014-03-21 15:03 UTC (permalink / raw)
  To: Neil Horman, dev

On 03/21/2014 07:49 AM, Neil Horman wrote:
> From: "H. Peter Anvin" <hpa@linux.intel.com>
> 
> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> Tested-by: Neil Horman <nhorman@tuxdriver.com>
> 

Thank you for dealing with this!

On the subject of my other email... are C99 initializers acceptable in
dpdk?  If so, I think making that change, too, would be a good idea.

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-21 15:03     ` H. Peter Anvin
@ 2014-03-21 17:48       ` Neil Horman
  2014-03-24 11:18         ` Thomas Monjalon
  0 siblings, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-03-21 17:48 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev

On Fri, Mar 21, 2014 at 08:03:34AM -0700, H. Peter Anvin wrote:
> On 03/21/2014 07:49 AM, Neil Horman wrote:
> > From: "H. Peter Anvin" <hpa@linux.intel.com>
> > 
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> > Tested-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> 
> Thank you for dealing with this!
> 
> On the subject of my other email... are C99 initializers acceptable in
> dpdk?  If so, I think making that change, too, would be a good idea.
> 
I'll have to defer this to others, I'm not sure what the accepted initalization
method is.  I'm guessing their fine, as both icc and gcc allow them and those
are the supported compilers for dpdk, but I'd like to hear someone in the
maintainership comment.

Best
Neil

> 	-hpa
> 
> 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-21 17:48       ` Neil Horman
@ 2014-03-24 11:18         ` Thomas Monjalon
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Monjalon @ 2014-03-24 11:18 UTC (permalink / raw)
  To: Neil Horman, H. Peter Anvin; +Cc: dev

21/03/2014 13:48, Neil Horman:
> On Fri, Mar 21, 2014 at 08:03:34AM -0700, H. Peter Anvin wrote:
> > On 03/21/2014 07:49 AM, Neil Horman wrote:
> > > From: "H. Peter Anvin" <hpa@linux.intel.com>
> > > 
> > > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > > clobbered when the code was compiled PIC or PIE, because the
> > > i386-specific code to preserve %ebx was incorrectly compiled.
> > > 
> > > However, the code is really way more complex than it needs to be.  For
> > > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > > (subleaf) as parameters, and since we are testing for bits, we might
> > > as well list the bits explicitly.  Furthermore, we can use an array
> > > rather than doing a switch statement inside a structure.
> > > 
> > > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> > > Tested-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> > Thank you for dealing with this!
> > 
> > On the subject of my other email... are C99 initializers acceptable in
> > dpdk?  If so, I think making that change, too, would be a good idea.
> 
> I'll have to defer this to others, I'm not sure what the accepted
> initalization method is.  I'm guessing their fine, as both icc and gcc
> allow them and those are the supported compilers for dpdk, but I'd like to
> hear someone in the maintainership comment.

Yes, DPDK use C99 syntax.
Feel free to use it in a v3 :)

-- 
Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
                     ` (2 preceding siblings ...)
  2014-03-21 14:49   ` [dpdk-dev] [PATCH v2] " Neil Horman
@ 2014-03-24 17:44   ` Neil Horman
  2014-03-24 18:09     ` H. Peter Anvin
  2014-03-25 17:03   ` [dpdk-dev] [PATCH v4] " Neil Horman
  2014-03-25 19:52   ` [dpdk-dev] [PATCH v5] " Neil Horman
  5 siblings, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-03-24 17:44 UTC (permalink / raw)
  To: dev; +Cc: H. Peter Anvin

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo

v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored
---
 lib/librte_eal/common/eal_common_cpuflags.c | 281 +++++++++++++---------------
 1 file changed, 134 insertions(+), 147 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..9ee0490 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -53,22 +53,15 @@
  * Enumeration of CPU registers
  */
 enum cpu_register_t {
-	REG_EAX = 0,
-	REG_EBX,
+	REG_EAX = 1,
 	REG_ECX,
 	REG_EDX,
+	REG_EBX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+#define CPUID_REG(reg) (reg - 1)
+
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -76,109 +69,111 @@ struct cpuid_parameters_t {
  * Struct to hold a processor feature entry
  */
 struct feature_entry {
-	enum rte_cpu_flag_t feature;            /**< feature name */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
 };
 
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+	[RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
 
 /**
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+	FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX,  0)
+	FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX,  1)
+	FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX,  2)
+	FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX,  3)
+	FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX,  4)
+	FEAT_DEF(VMX, 0x00000001, 0, REG_ECX,  5)
+	FEAT_DEF(SMX, 0x00000001, 0, REG_ECX,  6)
+	FEAT_DEF(EIST, 0x00000001, 0, REG_ECX,  7)
+	FEAT_DEF(TM2, 0x00000001, 0, REG_ECX,  8)
+	FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX,  9)
+	FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+	FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+	FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+	FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+	FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+	FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+	FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+	FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+	FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+	FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+	FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+	FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+	FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+	FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+	FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+	FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+	FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+	FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+	FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
 
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+	FEAT_DEF(FPU, 0x00000001, 0, REG_EDX,  0)
+	FEAT_DEF(VME, 0x00000001, 0, REG_EDX,  1)
+	FEAT_DEF(DE, 0x00000001, 0, REG_EDX,  2)
+	FEAT_DEF(PSE, 0x00000001, 0, REG_EDX,  3)
+	FEAT_DEF(TSC, 0x00000001, 0, REG_EDX,  4)
+	FEAT_DEF(MSR, 0x00000001, 0, REG_EDX,  5)
+	FEAT_DEF(PAE, 0x00000001, 0, REG_EDX,  6)
+	FEAT_DEF(MCE, 0x00000001, 0, REG_EDX,  7)
+	FEAT_DEF(CX8, 0x00000001, 0, REG_EDX,  8)
+	FEAT_DEF(APIC, 0x00000001, 0, REG_EDX,  9)
+	FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+	FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+	FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+	FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+	FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+	FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+	FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+	FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+	FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+	FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+	FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+	FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+	FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+	FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+	FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+	FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+	FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+	FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+	FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
 
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+	FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX,  0)
+	FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX,  1)
+	FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX,  2)
+	FEAT_DEF(PLN, 0x00000006, 0, REG_EAX,  4)
+	FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX,  5)
+	FEAT_DEF(PTM, 0x00000006, 0, REG_EAX,  6)
 
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+	FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX,  0)
+	FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX,  1)
+	FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX,  3)
 
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+	FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX,  0)
+	FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX,  2)
+	FEAT_DEF(HLE, 0x00000007, 0, REG_EBX,  4)
+	FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX,  5)
+	FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX,  6)
+	FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX,  7)
+	FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX,  8)
+	FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+	FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
 
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+	FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX,  0)
+	FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX,  4)
 
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+	FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+	FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+	FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+	FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+	FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
 
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX,  8)
 };
 
 /*
@@ -187,51 +182,27 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[CPUID_REG(REG_EBX)]),
+		   "=a" (out[CPUID_REG(REG_EAX)]),
+		   "=c" (out[CPUID_REG(REG_ECX)]),
+		   "=d" (out[CPUID_REG(REG_EDX)])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
-#endif
 
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
+    asm volatile("cpuid"
+		 : "=a" (out[CPUID_REG(REG_EAX)]),
+		   "=b" (out[CPUID_REG(REG_EBX)]),
+		   "=c" (out[CPUID_REG(REG_ECX)]),
+		   "=d" (out[CPUID_REG(REG_EDX)])
+		 : "a" (leaf), "c" (subleaf));
+
+#endif
 }
 
 /*
@@ -240,17 +211,24 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	if (!feat->reg)
+		/* This entry in the table wasn't filled out! */
+		return -EFAULT;
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[CPUID_REG(feat->reg)] >> feat->bit) & 1;
 }
 
 /**
@@ -271,9 +249,18 @@ rte_cpu_check_supported(void)
 			RTE_COMPILE_TIME_CPUFLAGS
 	};
 	unsigned i;
+	int ret;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+		if (ret < 0) {
+			fprintf(stderr,
+				"ERROR: CPU feature flag lookup failed with error %d\n",
+				ret);
+			exit(1);
+		}
+		if (!ret) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-24 17:44   ` [dpdk-dev] [PATCH v3] " Neil Horman
@ 2014-03-24 18:09     ` H. Peter Anvin
  2014-03-24 19:52       ` Neil Horman
  0 siblings, 1 reply; 20+ messages in thread
From: H. Peter Anvin @ 2014-03-24 18:09 UTC (permalink / raw)
  To: Neil Horman, dev

On 03/24/2014 10:44 AM, Neil Horman wrote:
> * Modified cpuid_reg enum to start at 1 rather than zero
> * Added CPUID_REG macro to drop enum value by 1 during access

I guess I don't get it... why?

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-24 18:09     ` H. Peter Anvin
@ 2014-03-24 19:52       ` Neil Horman
  2014-03-24 20:47         ` H. Peter Anvin
  0 siblings, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-03-24 19:52 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev

On Mon, Mar 24, 2014 at 11:09:52AM -0700, H. Peter Anvin wrote:
> On 03/24/2014 10:44 AM, Neil Horman wrote:
> > * Modified cpuid_reg enum to start at 1 rather than zero
> > * Added CPUID_REG macro to drop enum value by 1 during access
> 
> I guess I don't get it... why?
> 
To add an extra sanity check in rte_get_flag_enabled.  If we were moving to the
use of C99 initalizers, I wanted something to catch the possibility that we skip
a flag by accident (i.e. leave a zero initalized hole in the array).  Except 0
from my read is a valid value for all the fields of the array.  So I bumped up
the cpuid register enum by one and wrapped it in a macro.  That way we can test
for !feat->reg as an indicator that we're requesting feature support for a flag
thats not listed in the array.
Neil

> 	-hpa
> 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-24 19:52       ` Neil Horman
@ 2014-03-24 20:47         ` H. Peter Anvin
  2014-03-25 10:41           ` Neil Horman
  0 siblings, 1 reply; 20+ messages in thread
From: H. Peter Anvin @ 2014-03-24 20:47 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On 03/24/2014 12:52 PM, Neil Horman wrote:
>>
> To add an extra sanity check in rte_get_flag_enabled.  If we were moving to the
> use of C99 initalizers, I wanted something to catch the possibility that we skip
> a flag by accident (i.e. leave a zero initalized hole in the array).  Except 0
> from my read is a valid value for all the fields of the array.  So I bumped up
> the cpuid register enum by one and wrapped it in a macro.  That way we can test
> for !feat->reg as an indicator that we're requesting feature support for a flag
> thats not listed in the array.

It actually isn't: there aren't any flags in CPUID leaf 0, so since the
code only looks for bits it'd be perfectly okay to reject leaf 0.

Another thing that I noted is that the code doesn't actually check that
any particular leaf is valid (by checking the maximum leaf number in
CPUID leaf 0xXXXX0000:EAX).  Especially for the leaf 7 features this
could result in false positives, which obviously would be disastrous.

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-24 20:47         ` H. Peter Anvin
@ 2014-03-25 10:41           ` Neil Horman
  0 siblings, 0 replies; 20+ messages in thread
From: Neil Horman @ 2014-03-25 10:41 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev

On Mon, Mar 24, 2014 at 01:47:55PM -0700, H. Peter Anvin wrote:
> On 03/24/2014 12:52 PM, Neil Horman wrote:
> >>
> > To add an extra sanity check in rte_get_flag_enabled.  If we were moving to the
> > use of C99 initalizers, I wanted something to catch the possibility that we skip
> > a flag by accident (i.e. leave a zero initalized hole in the array).  Except 0
> > from my read is a valid value for all the fields of the array.  So I bumped up
> > the cpuid register enum by one and wrapped it in a macro.  That way we can test
> > for !feat->reg as an indicator that we're requesting feature support for a flag
> > thats not listed in the array.
> 
> It actually isn't: there aren't any flags in CPUID leaf 0, so since the
> code only looks for bits it'd be perfectly okay to reject leaf 0.
> 
> Another thing that I noted is that the code doesn't actually check that
> any particular leaf is valid (by checking the maximum leaf number in
> CPUID leaf 0xXXXX0000:EAX).  Especially for the leaf 7 features this
> could result in false positives, which obviously would be disastrous.
> 
Thanks, I'll improve this checking today.

> 	-hpa
> 
> 
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v4] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
                     ` (3 preceding siblings ...)
  2014-03-24 17:44   ` [dpdk-dev] [PATCH v3] " Neil Horman
@ 2014-03-25 17:03   ` Neil Horman
  2014-03-25 17:06     ` Chris Wright
  2014-03-25 17:37     ` H. Peter Anvin
  2014-03-25 19:52   ` [dpdk-dev] [PATCH v5] " Neil Horman
  5 siblings, 2 replies; 20+ messages in thread
From: Neil Horman @ 2014-03-25 17:03 UTC (permalink / raw)
  To: dev; +Cc: H. Peter Anvin

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo

v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored

v4)
* Fixed sanity checks to not offset feat->reg and just check !feat->reg
* Added a check for the sanity of the leaf node
---
 lib/librte_eal/common/eal_common_cpuflags.c | 287 ++++++++++++++--------------
 1 file changed, 141 insertions(+), 146 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..b61e271 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
  */
 enum cpu_register_t {
 	REG_EAX = 0,
-	REG_EBX,
 	REG_ECX,
 	REG_EDX,
+	REG_EBX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -76,109 +67,111 @@ struct cpuid_parameters_t {
  * Struct to hold a processor feature entry
  */
 struct feature_entry {
-	enum rte_cpu_flag_t feature;            /**< feature name */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
 };
 
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+	[RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
 
 /**
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+	FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX,  0)
+	FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX,  1)
+	FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX,  2)
+	FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX,  3)
+	FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX,  4)
+	FEAT_DEF(VMX, 0x00000001, 0, REG_ECX,  5)
+	FEAT_DEF(SMX, 0x00000001, 0, REG_ECX,  6)
+	FEAT_DEF(EIST, 0x00000001, 0, REG_ECX,  7)
+	FEAT_DEF(TM2, 0x00000001, 0, REG_ECX,  8)
+	FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX,  9)
+	FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+	FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+	FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+	FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+	FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+	FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+	FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+	FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+	FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+	FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+	FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+	FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+	FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+	FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+	FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+	FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+	FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+	FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+	FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
 
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+	FEAT_DEF(FPU, 0x00000001, 0, REG_EDX,  0)
+	FEAT_DEF(VME, 0x00000001, 0, REG_EDX,  1)
+	FEAT_DEF(DE, 0x00000001, 0, REG_EDX,  2)
+	FEAT_DEF(PSE, 0x00000001, 0, REG_EDX,  3)
+	FEAT_DEF(TSC, 0x00000001, 0, REG_EDX,  4)
+	FEAT_DEF(MSR, 0x00000001, 0, REG_EDX,  5)
+	FEAT_DEF(PAE, 0x00000001, 0, REG_EDX,  6)
+	FEAT_DEF(MCE, 0x00000001, 0, REG_EDX,  7)
+	FEAT_DEF(CX8, 0x00000001, 0, REG_EDX,  8)
+	FEAT_DEF(APIC, 0x00000001, 0, REG_EDX,  9)
+	FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+	FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+	FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+	FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+	FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+	FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+	FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+	FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+	FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+	FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+	FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+	FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+	FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+	FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+	FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+	FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+	FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+	FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+	FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
 
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+	FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX,  0)
+	FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX,  1)
+	FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX,  2)
+	FEAT_DEF(PLN, 0x00000006, 0, REG_EAX,  4)
+	FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX,  5)
+	FEAT_DEF(PTM, 0x00000006, 0, REG_EAX,  6)
 
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+	FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX,  0)
+	FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX,  1)
+	FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX,  3)
 
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+	FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX,  0)
+	FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX,  2)
+	FEAT_DEF(HLE, 0x00000007, 0, REG_EBX,  4)
+	FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX,  5)
+	FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX,  6)
+	FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX,  7)
+	FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX,  8)
+	FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+	FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
 
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+	FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX,  0)
+	FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX,  4)
 
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+	FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+	FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+	FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+	FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+	FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
 
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX,  8)
 };
 
 /*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
-#endif
 
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
+    asm volatile("cpuid"
+		 : "=a" (out[REG_EAX]),
+		   "=b" (out[REG_EBX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+
+#endif
 }
 
 /*
@@ -240,17 +209,34 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
+	static uint32_t max_leaf = 0;
+
+	if (!max_leaf) {
+		/* Get the max input leaf for this processor */
+		rte_cpu_get_features(0, 0, regs);
+		max_leaf = regs[REG_EAX];
+	}
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	if (!feat->leaf)
+		/* This entry in the table wasn't filled out! */
+		return -EFAULT;
+
+	if (feat->leaf > max_leaf)
+		return -EINVAL;
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[feat->reg] >> feat->bit) & 1;
 }
 
 /**
@@ -271,9 +257,18 @@ rte_cpu_check_supported(void)
 			RTE_COMPILE_TIME_CPUFLAGS
 	};
 	unsigned i;
+	int ret;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+		if (ret < 0) {
+			fprintf(stderr,
+				"ERROR: CPU feature flag lookup failed with error %d\n",
+				ret);
+			exit(1);
+		}
+		if (!ret) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v4] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-25 17:03   ` [dpdk-dev] [PATCH v4] " Neil Horman
@ 2014-03-25 17:06     ` Chris Wright
  2014-03-25 17:37     ` H. Peter Anvin
  1 sibling, 0 replies; 20+ messages in thread
From: Chris Wright @ 2014-03-25 17:06 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, H. Peter Anvin

* Neil Horman (nhorman@tuxdriver.com) wrote:

(given the format, I'd expect a From hpa here)

> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

And an S-o-B by you

(sorry for the otherwise content free nitpick)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v4] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-25 17:03   ` [dpdk-dev] [PATCH v4] " Neil Horman
  2014-03-25 17:06     ` Chris Wright
@ 2014-03-25 17:37     ` H. Peter Anvin
  1 sibling, 0 replies; 20+ messages in thread
From: H. Peter Anvin @ 2014-03-25 17:37 UTC (permalink / raw)
  To: Neil Horman, dev

On 03/25/2014 10:03 AM, Neil Horman wrote:
>  int
>  rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
>  {
> -	int value;
> +	const struct feature_entry *feat;
> +	cpuid_registers_t regs;
> +	static uint32_t max_leaf = 0;
> +
> +	if (!max_leaf) {
> +		/* Get the max input leaf for this processor */
> +		rte_cpu_get_features(0, 0, regs);
> +		max_leaf = regs[REG_EAX];
> +	}
>  
>  	if (feature >= RTE_CPUFLAG_NUMFLAGS)
>  		/* Flag does not match anything in the feature tables */
>  		return -ENOENT;
>  
> -	/* get value of the register containing the desired feature */
> -	value = rte_cpu_get_features(cpu_feature_table[feature].params);
> +	feat = &cpu_feature_table[feature];
> +
> +	if (!feat->leaf)
> +		/* This entry in the table wasn't filled out! */
> +		return -EFAULT;

> +	if (feat->leaf > max_leaf)
> +		return -EINVAL;

This doesn't quite work.  The max_leaf is per CPUID "group", i.e. the
8000xxxx CPUID leaves have a different limit than 0000xxxx leaves.  So I
would just do this as:

	rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
	if (((regs[REG_EAX] ^ feat->leaf) & 0xffff0000) ||
	    regs[REG_EAX] < feat->leaf)
		return 0;

Returning 0 is the right thing, because this is a legitimate instance of
"this feature is not supported."

The first part is a sanity check that the CPUID leaf group is supported
at all; the second part is the actual limit check.

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [dpdk-dev] [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
                     ` (4 preceding siblings ...)
  2014-03-25 17:03   ` [dpdk-dev] [PATCH v4] " Neil Horman
@ 2014-03-25 19:52   ` Neil Horman
  2014-03-25 20:51     ` H. Peter Anvin
  5 siblings, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-03-25 19:52 UTC (permalink / raw)
  To: dev; +Cc: H. Peter Anvin

Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.

However, the code is really way more complex than it needs to be.  For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly.  Furthermore, we can use an array
rather than doing a switch statement inside a structure.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo

v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored

v4)
* Fixed sanity checks to not offset feat->reg and just check !feat->reg
* Added a check for the sanity of the leaf node

v5)
* Fixed max leaf check to just return not supported rather than error
---
 lib/librte_eal/common/eal_common_cpuflags.c | 281 ++++++++++++++--------------
 1 file changed, 136 insertions(+), 145 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..f9c1840 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -59,16 +59,7 @@ enum cpu_register_t {
 	REG_EDX,
 };
 
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
-	uint32_t eax;
-	uint32_t ebx;
-	uint32_t ecx;
-	uint32_t edx;
-	enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
 
 #define CPU_FLAG_NAME_MAX_LEN 64
 
@@ -76,109 +67,111 @@ struct cpuid_parameters_t {
  * Struct to hold a processor feature entry
  */
 struct feature_entry {
-	enum rte_cpu_flag_t feature;            /**< feature name */
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
 	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
-	struct cpuid_parameters_t params;       /**< cpuid parameters */
-	uint32_t feature_mask;                  /**< bitmask for feature */
 };
 
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+	[RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
 
 /**
  * An array that holds feature entries
  */
 static const struct feature_entry cpu_feature_table[] = {
-	{FEAT_DEF(SSE3),              {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(PCLMULQDQ),         {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(DTES64),            {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
-	{FEAT_DEF(MONITOR),           {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
-	{FEAT_DEF(DS_CPL),            {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
-	{FEAT_DEF(VMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
-	{FEAT_DEF(SMX),               {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
-	{FEAT_DEF(EIST),              {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
-	{FEAT_DEF(TM2),               {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
-	{FEAT_DEF(SSSE3),             {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
-	{FEAT_DEF(CNXT_ID),           {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
-	{FEAT_DEF(FMA),               {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
-	{FEAT_DEF(CMPXCHG16B),        {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
-	{FEAT_DEF(XTPR),              {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
-	{FEAT_DEF(PDCM),              {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
-	{FEAT_DEF(PCID),              {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
-	{FEAT_DEF(DCA),               {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
-	{FEAT_DEF(SSE4_1),            {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
-	{FEAT_DEF(SSE4_2),            {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
-	{FEAT_DEF(X2APIC),            {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
-	{FEAT_DEF(MOVBE),             {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
-	{FEAT_DEF(POPCNT),            {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
-	{FEAT_DEF(TSC_DEADLINE),      {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
-	{FEAT_DEF(AES),               {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
-	{FEAT_DEF(XSAVE),             {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
-	{FEAT_DEF(OSXSAVE),           {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
-	{FEAT_DEF(AVX),               {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
-	{FEAT_DEF(F16C),              {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
-	{FEAT_DEF(RDRAND),            {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+	FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX,  0)
+	FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX,  1)
+	FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX,  2)
+	FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX,  3)
+	FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX,  4)
+	FEAT_DEF(VMX, 0x00000001, 0, REG_ECX,  5)
+	FEAT_DEF(SMX, 0x00000001, 0, REG_ECX,  6)
+	FEAT_DEF(EIST, 0x00000001, 0, REG_ECX,  7)
+	FEAT_DEF(TM2, 0x00000001, 0, REG_ECX,  8)
+	FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX,  9)
+	FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+	FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+	FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+	FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+	FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+	FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+	FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+	FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+	FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+	FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+	FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+	FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+	FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+	FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+	FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+	FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+	FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+	FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+	FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
 
-	{FEAT_DEF(FPU),               {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
-	{FEAT_DEF(VME),               {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
-	{FEAT_DEF(DE),                {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
-	{FEAT_DEF(PSE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
-	{FEAT_DEF(TSC),               {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
-	{FEAT_DEF(MSR),               {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
-	{FEAT_DEF(PAE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
-	{FEAT_DEF(MCE),               {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
-	{FEAT_DEF(CX8),               {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
-	{FEAT_DEF(APIC),              {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
-	{FEAT_DEF(SEP),               {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(MTRR),              {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
-	{FEAT_DEF(PGE),               {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
-	{FEAT_DEF(MCA),               {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
-	{FEAT_DEF(CMOV),              {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
-	{FEAT_DEF(PAT),               {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
-	{FEAT_DEF(PSE36),             {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
-	{FEAT_DEF(PSN),               {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
-	{FEAT_DEF(CLFSH),             {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
-	{FEAT_DEF(DS),                {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
-	{FEAT_DEF(ACPI),              {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
-	{FEAT_DEF(MMX),               {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
-	{FEAT_DEF(FXSR),              {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
-	{FEAT_DEF(SSE),               {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
-	{FEAT_DEF(SSE2),              {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(SS),                {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(HTT),               {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
-	{FEAT_DEF(TM),                {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
-	{FEAT_DEF(PBE),               {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+	FEAT_DEF(FPU, 0x00000001, 0, REG_EDX,  0)
+	FEAT_DEF(VME, 0x00000001, 0, REG_EDX,  1)
+	FEAT_DEF(DE, 0x00000001, 0, REG_EDX,  2)
+	FEAT_DEF(PSE, 0x00000001, 0, REG_EDX,  3)
+	FEAT_DEF(TSC, 0x00000001, 0, REG_EDX,  4)
+	FEAT_DEF(MSR, 0x00000001, 0, REG_EDX,  5)
+	FEAT_DEF(PAE, 0x00000001, 0, REG_EDX,  6)
+	FEAT_DEF(MCE, 0x00000001, 0, REG_EDX,  7)
+	FEAT_DEF(CX8, 0x00000001, 0, REG_EDX,  8)
+	FEAT_DEF(APIC, 0x00000001, 0, REG_EDX,  9)
+	FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+	FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+	FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+	FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+	FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+	FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+	FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+	FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+	FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+	FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+	FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+	FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+	FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+	FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+	FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+	FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+	FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+	FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+	FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
 
-	{FEAT_DEF(DIGTEMP),           {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
-	{FEAT_DEF(TRBOBST),           {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
-	{FEAT_DEF(ARAT),              {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
-	{FEAT_DEF(PLN),               {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
-	{FEAT_DEF(ECMD),              {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
-	{FEAT_DEF(PTM),               {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+	FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX,  0)
+	FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX,  1)
+	FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX,  2)
+	FEAT_DEF(PLN, 0x00000006, 0, REG_EAX,  4)
+	FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX,  5)
+	FEAT_DEF(PTM, 0x00000006, 0, REG_EAX,  6)
 
-	{FEAT_DEF(MPERF_APERF_MSR),   {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(ACNT2),             {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
-	{FEAT_DEF(ENERGY_EFF),        {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+	FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX,  0)
+	FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX,  1)
+	FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX,  3)
 
-	{FEAT_DEF(FSGSBASE),          {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
-	{FEAT_DEF(BMI1),              {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
-	{FEAT_DEF(HLE),               {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
-	{FEAT_DEF(AVX2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
-	{FEAT_DEF(SMEP),              {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
-	{FEAT_DEF(BMI2),              {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
-	{FEAT_DEF(ERMS),              {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
-	{FEAT_DEF(INVPCID),           {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
-	{FEAT_DEF(RTM),               {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+	FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX,  0)
+	FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX,  2)
+	FEAT_DEF(HLE, 0x00000007, 0, REG_EBX,  4)
+	FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX,  5)
+	FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX,  6)
+	FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX,  7)
+	FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX,  8)
+	FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+	FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
 
-	{FEAT_DEF(LAHF_SAHF),  {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
-	{FEAT_DEF(LZCNT),      {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+	FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX,  0)
+	FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX,  4)
 
-	{FEAT_DEF(SYSCALL),    {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
-	{FEAT_DEF(XD),         {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
-	{FEAT_DEF(1GB_PG),     {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
-	{FEAT_DEF(RDTSCP),     {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
-	{FEAT_DEF(EM64T),      {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+	FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+	FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+	FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+	FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+	FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
 
-	{FEAT_DEF(INVTSC),     {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+	FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX,  8)
 };
 
 /*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
  * This function, when compiled with GCC, will generate architecture-neutral
  * code, as per GCC manual.
  */
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
 {
-	int eax, ebx, ecx, edx;            /* registers */
-
-#ifndef __PIC__
-   asm volatile ("cpuid"
-                 /* output */
-                 : "=a" (eax),
-                   "=b" (ebx),
-                   "=c" (ecx),
-                   "=d" (edx)
-                 /* input */
-                 : "a" (params.eax),
-                   "b" (params.ebx),
-                   "c" (params.ecx),
-                   "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+    /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+    asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[REG_EBX]),
+		   "=a" (out[REG_EAX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
 #else
-	asm volatile ( 
-            "mov %%ebx, %%edi\n"
-            "cpuid\n"
-            "xchgl %%ebx, %%edi;\n"
-            : "=a" (eax),
-              "=D" (ebx),
-              "=c" (ecx),
-              "=d" (edx)
-            /* input */
-            : "a" (params.eax),
-              "D" (params.ebx),
-              "c" (params.ecx),
-              "d" (params.edx));
-#endif
 
-	switch (params.return_register) {
-	case REG_EAX:
-		return eax;
-	case REG_EBX:
-		return ebx;
-	case REG_ECX:
-		return ecx;
-	case REG_EDX:
-		return edx;
-	default:
-		return 0;
-	}
+    asm volatile("cpuid"
+		 : "=a" (out[REG_EAX]),
+		   "=b" (out[REG_EBX]),
+		   "=c" (out[REG_ECX]),
+		   "=d" (out[REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+
+#endif
 }
 
 /*
@@ -240,17 +209,30 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
 {
-	int value;
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
+
 
 	if (feature >= RTE_CPUFLAG_NUMFLAGS)
 		/* Flag does not match anything in the feature tables */
 		return -ENOENT;
 
-	/* get value of the register containing the desired feature */
-	value = rte_cpu_get_features(cpu_feature_table[feature].params);
+	feat = &cpu_feature_table[feature];
+
+	if (!feat->leaf)
+		/* This entry in the table wasn't filled out! */
+		return -EFAULT;
+
+	rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
+	if (((regs[REG_EAX] ^ feat->leaf) & 0xffff0000) ||
+	      regs[REG_EAX] < feat->leaf)
+		return 0;
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
 
 	/* check if the feature is enabled */
-	return (cpu_feature_table[feature].feature_mask & value) > 0;
+	return (regs[feat->reg] >> feat->bit) & 1;
 }
 
 /**
@@ -271,9 +253,18 @@ rte_cpu_check_supported(void)
 			RTE_COMPILE_TIME_CPUFLAGS
 	};
 	unsigned i;
+	int ret;
 
 	for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
-		if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+		ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+		if (ret < 0) {
+			fprintf(stderr,
+				"ERROR: CPU feature flag lookup failed with error %d\n",
+				ret);
+			exit(1);
+		}
+		if (!ret) {
 			fprintf(stderr,
 			        "ERROR: This system does not support \"%s\".\n"
 			        "Please check that RTE_MACHINE is set correctly.\n",
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-25 19:52   ` [dpdk-dev] [PATCH v5] " Neil Horman
@ 2014-03-25 20:51     ` H. Peter Anvin
  2014-04-02 11:16       ` Neil Horman
  2014-04-02 12:42       ` Thomas Monjalon
  0 siblings, 2 replies; 20+ messages in thread
From: H. Peter Anvin @ 2014-03-25 20:51 UTC (permalink / raw)
  To: Neil Horman, dev

On 03/25/2014 12:52 PM, Neil Horman wrote:
> Neil Horman reported that on x86-64 the upper half of %rbx would get
> clobbered when the code was compiled PIC or PIE, because the
> i386-specific code to preserve %ebx was incorrectly compiled.
> 
> However, the code is really way more complex than it needs to be.  For
> one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> (subleaf) as parameters, and since we are testing for bits, we might
> as well list the bits explicitly.  Furthermore, we can use an array
> rather than doing a switch statement inside a structure.
> 
> Reported-by: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 

Looks good to me.

Reviewed-by: H. Peter Anvin <hpa@linux.intel.com>

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-25 20:51     ` H. Peter Anvin
@ 2014-04-02 11:16       ` Neil Horman
  2014-04-02 11:53         ` Thomas Monjalon
  2014-04-02 12:42       ` Thomas Monjalon
  1 sibling, 1 reply; 20+ messages in thread
From: Neil Horman @ 2014-04-02 11:16 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: dev

On Tue, Mar 25, 2014 at 01:51:04PM -0700, H. Peter Anvin wrote:
> On 03/25/2014 12:52 PM, Neil Horman wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> 
> Looks good to me.
> 
> Reviewed-by: H. Peter Anvin <hpa@linux.intel.com>
> 
> 	-hpa
> 
> 
> 
Bump, did this get lost somewhere?  Its been over a week and I don't see it in
the tree

Regards
Neil

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-04-02 11:16       ` Neil Horman
@ 2014-04-02 11:53         ` Thomas Monjalon
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Monjalon @ 2014-04-02 11:53 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev, H. Peter Anvin

2014-04-02 07:16, Neil Horman:
> On Tue, Mar 25, 2014 at 01:51:04PM -0700, H. Peter Anvin wrote:
> > On 03/25/2014 12:52 PM, Neil Horman wrote:
> > > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > > clobbered when the code was compiled PIC or PIE, because the
> > > i386-specific code to preserve %ebx was incorrectly compiled.
> > > 
> > > However, the code is really way more complex than it needs to be.  For
> > > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > > (subleaf) as parameters, and since we are testing for bits, we might
> > > as well list the bits explicitly.  Furthermore, we can use an array
> > > rather than doing a switch statement inside a structure.
> > > 
> > > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > 
> > Looks good to me.
> > 
> > Reviewed-by: H. Peter Anvin <hpa@linux.intel.com>
> > 
> > 	-hpa
> 
> Bump, did this get lost somewhere?  Its been over a week and I don't see it
> in the tree

No, it's not lost. But this patch is not trivial and there were 5 versions 
with acknowledgements in the middle. So I think it was not a bad idea to wait 
few days in order to be sure this version is OK :)

-- 
Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [dpdk-dev] [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
  2014-03-25 20:51     ` H. Peter Anvin
  2014-04-02 11:16       ` Neil Horman
@ 2014-04-02 12:42       ` Thomas Monjalon
  1 sibling, 0 replies; 20+ messages in thread
From: Thomas Monjalon @ 2014-04-02 12:42 UTC (permalink / raw)
  To: H. Peter Anvin, Neil Horman; +Cc: dev

2014-03-25 13:51, H. Peter Anvin:
> On 03/25/2014 12:52 PM, Neil Horman wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> > 
> > However, the code is really way more complex than it needs to be.  For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly.  Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> > 
> > Reported-by: Neil Horman <nhorman@tuxdriver.com>
> > Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Looks good to me.
> 
> Reviewed-by: H. Peter Anvin <hpa@linux.intel.com>

Applied for version 1.6.0r2.

Thanks for this difficult fix.
-- 
Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2014-04-02 12:40 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1395330830-1310-1-git-send-email-hpa@linux.intel.com>
2014-03-20 16:39 ` [dpdk-dev] [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code Neil Horman
2014-03-20 17:02   ` Thomas Monjalon
2014-03-20 18:04   ` Neil Horman
2014-03-21 14:49   ` [dpdk-dev] [PATCH v2] " Neil Horman
2014-03-21 15:03     ` H. Peter Anvin
2014-03-21 17:48       ` Neil Horman
2014-03-24 11:18         ` Thomas Monjalon
2014-03-24 17:44   ` [dpdk-dev] [PATCH v3] " Neil Horman
2014-03-24 18:09     ` H. Peter Anvin
2014-03-24 19:52       ` Neil Horman
2014-03-24 20:47         ` H. Peter Anvin
2014-03-25 10:41           ` Neil Horman
2014-03-25 17:03   ` [dpdk-dev] [PATCH v4] " Neil Horman
2014-03-25 17:06     ` Chris Wright
2014-03-25 17:37     ` H. Peter Anvin
2014-03-25 19:52   ` [dpdk-dev] [PATCH v5] " Neil Horman
2014-03-25 20:51     ` H. Peter Anvin
2014-04-02 11:16       ` Neil Horman
2014-04-02 11:53         ` Thomas Monjalon
2014-04-02 12:42       ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).