DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
@ 2020-01-28 21:02 Thinh Tran
  2020-01-31  0:15 ` David Christensen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Thinh Tran @ 2020-01-28 21:02 UTC (permalink / raw)
  To: dev; +Cc: drc, Thinh Tran

  __ppc_get_timebase() is GNU extention and is more efficient

Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
---
 .../common/include/arch/ppc_64/rte_cycles.h   | 28 ++-----------------
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
index 8f2e98642..871f9b6e4 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -14,6 +14,7 @@ extern "C" {
 
 #include <rte_byteorder.h>
 #include <rte_common.h>
+#include <sys/platform/ppc.h>
 
 /**
  * Read the time base register.
@@ -24,32 +25,7 @@ extern "C" {
 static inline uint64_t
 rte_rdtsc(void)
 {
-	union {
-		uint64_t tsc_64;
-		RTE_STD_C11
-		struct {
-#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
-			uint32_t hi_32;
-			uint32_t lo_32;
-#else
-			uint32_t lo_32;
-			uint32_t hi_32;
-#endif
-		};
-	} tsc;
-	uint32_t tmp;
-
-	asm volatile(
-			"0:\n"
-			"mftbu   %[hi32]\n"
-			"mftb    %[lo32]\n"
-			"mftbu   %[tmp]\n"
-			"cmpw    %[tmp],%[hi32]\n"
-			"bne     0b\n"
-			: [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32),
-			[tmp] "=r"(tmp)
-		    );
-	return tsc.tsc_64;
+	return __ppc_get_timebase();
 }
 
 static inline uint64_t
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-28 21:02 [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase Thinh Tran
@ 2020-01-31  0:15 ` David Christensen
  2020-01-31 14:54 ` Thinh Tran
  2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
  2 siblings, 0 replies; 9+ messages in thread
From: David Christensen @ 2020-01-31  0:15 UTC (permalink / raw)
  To: Thinh Tran, dev

>    __ppc_get_timebase() is GNU extention and is more efficient
> 
> Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>

Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-28 21:02 [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase Thinh Tran
  2020-01-31  0:15 ` David Christensen
@ 2020-01-31 14:54 ` Thinh Tran
  2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
  2 siblings, 0 replies; 9+ messages in thread
From: Thinh Tran @ 2020-01-31 14:54 UTC (permalink / raw)
  To: dev; +Cc: drc

Need to resubmit a new patch, since the same code may be used by other 
ppc platform such as FreeBSD that may not have the sys/platform/ppc.h
Thinh Tran
On 1/28/2020 3:02 PM, Thinh Tran wrote:
>    __ppc_get_timebase() is GNU extention and is more efficient
> 
> Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
> ---
>   .../common/include/arch/ppc_64/rte_cycles.h   | 28 ++-----------------
>   1 file changed, 2 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> index 8f2e98642..871f9b6e4 100644
> --- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> @@ -14,6 +14,7 @@ extern "C" {
> 
>   #include <rte_byteorder.h>
>   #include <rte_common.h>
> +#include <sys/platform/ppc.h>
> 
>   /**
>    * Read the time base register.
> @@ -24,32 +25,7 @@ extern "C" {
>   static inline uint64_t
>   rte_rdtsc(void)
>   {
> -	union {
> -		uint64_t tsc_64;
> -		RTE_STD_C11
> -		struct {
> -#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> -			uint32_t hi_32;
> -			uint32_t lo_32;
> -#else
> -			uint32_t lo_32;
> -			uint32_t hi_32;
> -#endif
> -		};
> -	} tsc;
> -	uint32_t tmp;
> -
> -	asm volatile(
> -			"0:\n"
> -			"mftbu   %[hi32]\n"
> -			"mftb    %[lo32]\n"
> -			"mftbu   %[tmp]\n"
> -			"cmpw    %[tmp],%[hi32]\n"
> -			"bne     0b\n"
> -			: [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32),
> -			[tmp] "=r"(tmp)
> -		    );
> -	return tsc.tsc_64;
> +	return __ppc_get_timebase();
>   }
> 
>   static inline uint64_t
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-28 21:02 [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase Thinh Tran
  2020-01-31  0:15 ` David Christensen
  2020-01-31 14:54 ` Thinh Tran
@ 2020-01-31 22:03 ` Thinh Tran
  2020-02-04 18:00   ` David Christensen
                     ` (2 more replies)
  2 siblings, 3 replies; 9+ messages in thread
From: Thinh Tran @ 2020-01-31 22:03 UTC (permalink / raw)
  To: dev; +Cc: drc, Thinh Tran

  __ppc_get_timebase() is GNU extension and is more efficient

  v2: Advoid breaking other ppc_64 flatforms. The __ppc_get_timebase() 
      seems to be specific to powerpc platform and with GLIBC.

Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
---
 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
index 8f2e98642..1c3fd556e 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -14,6 +14,9 @@ extern "C" {
 
 #include <rte_byteorder.h>
 #include <rte_common.h>
+#if defined(__powerpc__) && defined(__GLIBC__)
+#include <sys/platform/ppc.h>
+#endif
 
 /**
  * Read the time base register.
@@ -24,6 +27,9 @@ extern "C" {
 static inline uint64_t
 rte_rdtsc(void)
 {
+#if defined(__powerpc__) && defined(__GLIBC__)
+	return __ppc_get_timebase();
+#else
 	union {
 		uint64_t tsc_64;
 		RTE_STD_C11
@@ -50,6 +56,7 @@ rte_rdtsc(void)
 			[tmp] "=r"(tmp)
 		    );
 	return tsc.tsc_64;
+#endif /* __powerpc__ && __GLIBC__ */
 }
 
 static inline uint64_t
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
@ 2020-02-04 18:00   ` David Christensen
  2020-02-05 21:29   ` David Marchand
  2020-03-10 12:55   ` David Marchand
  2 siblings, 0 replies; 9+ messages in thread
From: David Christensen @ 2020-02-04 18:00 UTC (permalink / raw)
  To: Thinh Tran, dev

>    __ppc_get_timebase() is GNU extension and is more efficient
> 
>    v2: Advoid breaking other ppc_64 flatforms. The __ppc_get_timebase()
>        seems to be specific to powerpc platform and with GLIBC.
> 
> Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>

Reviewed-by: David Christensen <drc@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
  2020-02-04 18:00   ` David Christensen
@ 2020-02-05 21:29   ` David Marchand
  2020-02-10 17:53     ` Thinh Tran
  2020-03-10 12:55   ` David Marchand
  2 siblings, 1 reply; 9+ messages in thread
From: David Marchand @ 2020-02-05 21:29 UTC (permalink / raw)
  To: Thinh Tran; +Cc: dev, David Christensen

On Fri, Jan 31, 2020 at 11:04 PM Thinh Tran <thinhtr@linux.vnet.ibm.com> wrote:
>
>   __ppc_get_timebase() is GNU extension and is more efficient

The commit title and log are quite short and give little idea on what
this is about.


I had a look at this glibc helper:

/* Read the Time Base Register.   */
static __inline__ uint64_t
__ppc_get_timebase (void)
{
#if __GNUC_PREREQ (4, 8)
  return __builtin_ppc_get_timebase ();
#else
# ifdef __powerpc64__
  uint64_t __tb;
  /* "volatile" is necessary here, because the user expects this assembly
     isn't moved after an optimization.  */
  __asm__ volatile ("mfspr %0, 268" : "=r" (__tb));
  return __tb;
# else  /* not __powerpc64__ */
  uint32_t __tbu, __tbl, __tmp; \
  __asm__ volatile ("0:\n\t"
                    "mftbu %0\n\t"
                    "mftbl %1\n\t"
                    "mftbu %2\n\t"
                    "cmpw %0, %2\n\t"
                    "bne- 0b"
                    : "=r" (__tbu), "=r" (__tbl), "=r" (__tmp));
  return (((uint64_t) __tbu << 32) | __tbl);
# endif  /* not __powerpc64__ */
#endif
}

The last block is exactly the code we had in dpdk.
So I suppose we are trying to use mfspr for register 268 which seems
linked to timebase (looking at the linux kernel sources).

Please, confirm this is an enhancement (and how this improves current
ppc support).
Thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-02-05 21:29   ` David Marchand
@ 2020-02-10 17:53     ` Thinh Tran
  0 siblings, 0 replies; 9+ messages in thread
From: Thinh Tran @ 2020-02-10 17:53 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, David Christensen

Hi, Sorry for late response.
Yes this is the enhancement for powerpc. Observations on our power8/9
the __ppc_get_timebase calls __builtin_ppc_get_timebase () which is 
result in calling the mftb instruction
    __ppc_get_timebase():
       mftb     rA
this instruction on a 64-bit implementation copies the entire time base 
(TBU||TBL) into rA, which also reduces number of cycles significantly 
comparing to the current code (same as last block)

Take the simple reciprocal division perf test on power9 that heavily 
calls rte_rdtsc() to demonstrate:
- without this batch:
Validating unsigned 32bit division.
32bit Division results:
Total number of cycles normal division     : 73744549935
Total number of cycles reciprocal division : 76954877143
Cycles per division(normal) : 17.17
Cycles per division(reciprocal) : 17.92
Validating unsigned 64bit division.
64bit Division results:
Total number of cycles normal division     : 73932937051
Total number of cycles reciprocal division : 74598584339
Cycles per division(normal) : 17.21
Cycles per division(reciprocal) : 17.37
Validating unsigned 64bit division with 32bit divisor.
64bit Division results:
Total number of cycles normal division     : 78660556171
Total number of cycles reciprocal division : 74566630579
Cycles per division(normal) : 18.31
Cycles per division(reciprocal) : 17.36
Validating division by power of 2.
64bit Division results:
Total number of cycles normal division     : 1097
Total number of cycles reciprocal division : 1201
Cycles per division(normal) : 17.14
Cycles per division(reciprocal) : 18.77
Test OK
RTE>>
- with the patch:
Validating unsigned 32bit division.
32bit Division results:
Total number of cycles normal division     : 41690214596
Total number of cycles reciprocal division : 44446377795
Cycles per division(normal) : 9.71
Cycles per division(reciprocal) : 10.35
Validating unsigned 64bit division.
64bit Division results:
Total number of cycles normal division     : 41687737031
Total number of cycles reciprocal division : 41666358052
Cycles per division(normal) : 9.71
Cycles per division(reciprocal) : 9.70
Validating unsigned 64bit division with 32bit divisor.
64bit Division results:
Total number of cycles normal division     : 46386969228
Total number of cycles reciprocal division : 41663680498
Cycles per division(normal) : 10.80
Cycles per division(reciprocal) : 9.70
Validating division by power of 2.
64bit Division results:
Total number of cycles normal division     : 618
Total number of cycles reciprocal division : 618
Cycles per division(normal) : 9.66
Cycles per division(reciprocal) : 9.66
Test OK
RTE>>

I hope this explains it.
Thanks,
Thinh Tran
On 2/5/2020 3:29 PM, David Marchand wrote:
> On Fri, Jan 31, 2020 at 11:04 PM Thinh Tran <thinhtr@linux.vnet.ibm.com> wrote:
>>
>>    __ppc_get_timebase() is GNU extension and is more efficient
> 
> The commit title and log are quite short and give little idea on what
> this is about.
> 
> 
> I had a look at this glibc helper:
> 
> /* Read the Time Base Register.   */
> static __inline__ uint64_t
> __ppc_get_timebase (void)
> {
> #if __GNUC_PREREQ (4, 8)
>    return __builtin_ppc_get_timebase ();
> #else
> # ifdef __powerpc64__
>    uint64_t __tb;
>    /* "volatile" is necessary here, because the user expects this assembly
>       isn't moved after an optimization.  */
>    __asm__ volatile ("mfspr %0, 268" : "=r" (__tb));
>    return __tb;
> # else  /* not __powerpc64__ */
>    uint32_t __tbu, __tbl, __tmp; \
>    __asm__ volatile ("0:\n\t"
>                      "mftbu %0\n\t"
>                      "mftbl %1\n\t"
>                      "mftbu %2\n\t"
>                      "cmpw %0, %2\n\t"
>                      "bne- 0b"
>                      : "=r" (__tbu), "=r" (__tbl), "=r" (__tmp));
>    return (((uint64_t) __tbu << 32) | __tbl);
> # endif  /* not __powerpc64__ */
> #endif
> }
> 
> The last block is exactly the code we had in dpdk.
> So I suppose we are trying to use mfspr for register 268 which seems
> linked to timebase (looking at the linux kernel sources).
> 
> Please, confirm this is an enhancement (and how this improves current
> ppc support).
> Thanks.
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
  2020-02-04 18:00   ` David Christensen
  2020-02-05 21:29   ` David Marchand
@ 2020-03-10 12:55   ` David Marchand
  2020-03-11 20:26     ` Thinh Tran
  2 siblings, 1 reply; 9+ messages in thread
From: David Marchand @ 2020-03-10 12:55 UTC (permalink / raw)
  To: Thinh Tran; +Cc: dev, David Christensen

On Fri, Jan 31, 2020 at 11:04 PM Thinh Tran <thinhtr@linux.vnet.ibm.com> wrote:
>
>   __ppc_get_timebase() is GNU extension and is more efficient
>
>   v2: Advoid breaking other ppc_64 flatforms. The __ppc_get_timebase()
>       seems to be specific to powerpc platform and with GLIBC.

dpdk only supports glibc at the moment.
https://doc.dpdk.org/guides/linux_gsg/sys_reqs.html#system-software

Now the 1M$ question is which C library are you using and on which platform :-).


>
> Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
> ---
>  lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> index 8f2e98642..1c3fd556e 100644
> --- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
> @@ -14,6 +14,9 @@ extern "C" {
>
>  #include <rte_byteorder.h>
>  #include <rte_common.h>
> +#if defined(__powerpc__) && defined(__GLIBC__)
> +#include <sys/platform/ppc.h>
> +#endif

libc headers must come first:
https://doc.dpdk.org/guides/contributing/coding_style.html#header-includes

Why do you need the __powerpc__ guard?

>
>  /**
>   * Read the time base register.
> @@ -24,6 +27,9 @@ extern "C" {
>  static inline uint64_t
>  rte_rdtsc(void)
>  {
> +#if defined(__powerpc__) && defined(__GLIBC__)
> +       return __ppc_get_timebase();
> +#else
>         union {
>                 uint64_t tsc_64;
>                 RTE_STD_C11
> @@ -50,6 +56,7 @@ rte_rdtsc(void)
>                         [tmp] "=r"(tmp)
>                     );
>         return tsc.tsc_64;
> +#endif /* __powerpc__ && __GLIBC__ */
>  }
>
>  static inline uint64_t
> --
> 2.17.1
>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal/ppc64: improve rte_rdtsc with ppc_get_timebase
  2020-03-10 12:55   ` David Marchand
@ 2020-03-11 20:26     ` Thinh Tran
  0 siblings, 0 replies; 9+ messages in thread
From: Thinh Tran @ 2020-03-11 20:26 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, David Christensen

Hi David, Thanks for your response.

We were worry about breaking the DPDK/FreeBSD on power, may hit the 
similar bug:
   https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=241146

 From my understanding, and also from the documentation
https://doc.dpdk.org/guides/freebsd_gsg/install_from_ports.html
DPDK is supported, glibc may not be required by default, and it is not 
clear what the supported architectures are.

So if FreeBSD does not support DPDK on powerpc or
FreeBSD supports DPDK on powerpc and requires glibc installed,
then the GLIBC guard can be removed

This is unknown to me.

Agreed. __powerpc__ guard is not necessary in this path and will be removed.

Thanks,
Thinh Tran

On 3/10/2020 7:55 AM, David Marchand wrote:
> On Fri, Jan 31, 2020 at 11:04 PM Thinh Tran <thinhtr@linux.vnet.ibm.com> wrote:
>>
>>    __ppc_get_timebase() is GNU extension and is more efficient
>>
>>    v2: Advoid breaking other ppc_64 flatforms. The __ppc_get_timebase()
>>        seems to be specific to powerpc platform and with GLIBC.
> 
> dpdk only supports glibc at the moment.
> https://doc.dpdk.org/guides/linux_gsg/sys_reqs.html#system-software
> 
> Now the 1M$ question is which C library are you using and on which platform :-).
> 
>>
>> Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
>> ---
>>   lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
>> index 8f2e98642..1c3fd556e 100644
>> --- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
>> +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
>> @@ -14,6 +14,9 @@ extern "C" {
>>
>>   #include <rte_byteorder.h>
>>   #include <rte_common.h>
>> +#if defined(__powerpc__) && defined(__GLIBC__)
>> +#include <sys/platform/ppc.h>
>> +#endif
> 
> libc headers must come first:
> https://doc.dpdk.org/guides/contributing/coding_style.html#header-includes
> 
> Why do you need the __powerpc__ guard?
> 
>>
>>   /**
>>    * Read the time base register.
>> @@ -24,6 +27,9 @@ extern "C" {
>>   static inline uint64_t
>>   rte_rdtsc(void)
>>   {
>> +#if defined(__powerpc__) && defined(__GLIBC__)
>> +       return __ppc_get_timebase();
>> +#else
>>          union {
>>                  uint64_t tsc_64;
>>                  RTE_STD_C11
>> @@ -50,6 +56,7 @@ rte_rdtsc(void)
>>                          [tmp] "=r"(tmp)
>>                      );
>>          return tsc.tsc_64;
>> +#endif /* __powerpc__ && __GLIBC__ */
>>   }
>>
>>   static inline uint64_t
>> --
>> 2.17.1
>>
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-03-11 20:27 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-28 21:02 [dpdk-dev] [PATCH] eal/ppc64: improve rte_rdtsc with ppc_get_timebase Thinh Tran
2020-01-31  0:15 ` David Christensen
2020-01-31 14:54 ` Thinh Tran
2020-01-31 22:03 ` [dpdk-dev] [PATCH v2] " Thinh Tran
2020-02-04 18:00   ` David Christensen
2020-02-05 21:29   ` David Marchand
2020-02-10 17:53     ` Thinh Tran
2020-03-10 12:55   ` David Marchand
2020-03-11 20:26     ` Thinh Tran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).