In order to get more accurate the cntvct_el0 reading, SW must invoke isb and arch_counter_enforce_ordering. Reference of linux kernel: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/include/asm/arch_timer.h?h=v5.5#n220 Signed-off-by: Haifeng Lin <haifeng.lin@huawei.com> --- .../common/include/arch/arm/rte_atomic_64.h | 3 +++ .../common/include/arch/arm/rte_cycles_64.h | 20 +++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h index 859ae129d..2587f98a2 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -21,6 +21,7 @@ extern "C" { #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define isb() (asm volatile("isb" : : : "memory")) #define rte_mb() dsb(sy) @@ -44,6 +45,8 @@ extern "C" { #define rte_cio_rmb() dmb(oshld) +#define rte_isb() isb() + /*------------------------ 128 bit atomic operations -------------------------*/ #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS) diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h index 68e7c7338..bc4e3f8e6 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h @@ -18,6 +18,7 @@ extern "C" { * The time base for this lcore. */ #ifndef RTE_ARM_EAL_RDTSC_USE_PMU + /** * This call is portable to any ARMv8 architecture, however, typically * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks. @@ -59,11 +60,26 @@ rte_rdtsc(void) } #endif +#define arch_counter_enforce_ordering(val) do { \ + uint64_t tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + + static inline uint64_t rte_rdtsc_precise(void) { - rte_mb(); - return rte_rdtsc(); + uint64_t tsc; + + rte_isb(); + tsc = rte_rdtsc(); + arch_counter_enforce_ordering(tsc); + return tsc; } static inline uint64_t -- 2.24.1.windows.2
On Tue, Mar 10, 2020 at 3:09 PM Linhaifeng <haifeng.lin@huawei.com> wrote: > > In order to get more accurate the cntvct_el0 reading, > SW must invoke isb and arch_counter_enforce_ordering. > > Reference of linux kernel: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/arm64/include/asm/arch_timer.h?h=v5.5#n220 > > Signed-off-by: Haifeng Lin <haifeng.lin@huawei.com> Not addressed Fixes: comment http://mails.dpdk.org/archives/dev/2020-March/159547.html > --- > .../common/include/arch/arm/rte_atomic_64.h | 3 +++ > .../common/include/arch/arm/rte_cycles_64.h | 20 +++++++++++++++++-- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > index 859ae129d..2587f98a2 100644 > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > @@ -21,6 +21,7 @@ extern "C" { > > #define dsb(opt) asm volatile("dsb " #opt : : : "memory") > #define dmb(opt) asm volatile("dmb " #opt : : : "memory") > +#define isb() (asm volatile("isb" : : : "memory")) > > #define rte_mb() dsb(sy) > > @@ -44,6 +45,8 @@ extern "C" { > > #define rte_cio_rmb() dmb(oshld) > > +#define rte_isb() isb() Not addressed comment http://mails.dpdk.org/archives/dev/2020-March/159547.html > + > /*------------------------ 128 bit atomic operations -------------------------*/ > > #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS) > diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > index 68e7c7338..bc4e3f8e6 100644 > --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > @@ -18,6 +18,7 @@ extern "C" { > * The time base for this lcore. > */ > #ifndef RTE_ARM_EAL_RDTSC_USE_PMU > + > /** > * This call is portable to any ARMv8 architecture, however, typically > * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks. > @@ -59,11 +60,26 @@ rte_rdtsc(void) > } > #endif > > +#define arch_counter_enforce_ordering(val) do { \ > + uint64_t tmp, _val = (val); \ > + \ > + asm volatile( \ > + " eor %0, %1, %1\n" \ > + " add %0, sp, %0\n" \ > + " ldr xzr, [%0]" \ > + : "=r" (tmp) : "r" (_val)); \ > +} while (0) > + Not addressed the comments in http://mails.dpdk.org/archives/dev/2020-March/159547.html Gavin, Linhaifeng, I don't think, this ordering is valid in the DPDK context. See the patch from Will. https://lore.kernel.org/patchwork/patch/1076132/ Thoughts? > static inline uint64_t > rte_rdtsc_precise(void) > { > - rte_mb(); > - return rte_rdtsc(); > + uint64_t tsc; > + > + rte_isb(); > + tsc = rte_rdtsc(); Not addressed the comments in http://mails.dpdk.org/archives/dev/2020-March/159547.html > + arch_counter_enforce_ordering(tsc); > + return tsc; > } > > static inline uint64_t > -- > 2.24.1.windows.2 >
> -----Original Message----- > From: Jerin Jacob [mailto:jerinjacobk@gmail.com] > Sent: Tuesday, March 10, 2020 6:47 PM > To: Linhaifeng <haifeng.lin@huawei.com> > Cc: Gavin Hu <Gavin.Hu@arm.com>; dev@dpdk.org; thomas@monjalon.net; > chenchanghu <chenchanghu@huawei.com>; xudingke > <xudingke@huawei.com>; Lilijun (Jerry) <jerry.lilijun@huawei.com>; Honnappa > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Steve Capper > <Steve.Capper@arm.com>; nd <nd@arm.com> > Subject: Re: [PATCH v3] eal/arm64: fix rdtsc precise version > > On Tue, Mar 10, 2020 at 3:09 PM Linhaifeng <haifeng.lin@huawei.com> wrote: > > > > In order to get more accurate the cntvct_el0 reading, SW must invoke > > isb and arch_counter_enforce_ordering. > > > > Reference of linux kernel: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tre > > e/arch/arm64/include/asm/arch_timer.h?h=v5.5#n220 > > > > Signed-off-by: Haifeng Lin <haifeng.lin@huawei.com> > > Not addressed Fixes: comment > http://mails.dpdk.org/archives/dev/2020-March/159547.html > > > --- > > .../common/include/arch/arm/rte_atomic_64.h | 3 +++ > > .../common/include/arch/arm/rte_cycles_64.h | 20 > +++++++++++++++++-- > > 2 files changed, 21 insertions(+), 2 deletions(-) > > > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > index 859ae129d..2587f98a2 100644 > > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h > > @@ -21,6 +21,7 @@ extern "C" { > > > > #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define > > dmb(opt) asm volatile("dmb " #opt : : : "memory") > > +#define isb() (asm volatile("isb" : : : "memory")) > > > > #define rte_mb() dsb(sy) > > > > @@ -44,6 +45,8 @@ extern "C" { > > > > #define rte_cio_rmb() dmb(oshld) > > > > +#define rte_isb() isb() > > Not addressed comment > http://mails.dpdk.org/archives/dev/2020-March/159547.html > > > > + > > /*------------------------ 128 bit atomic operations > > -------------------------*/ > > > > #if defined(__ARM_FEATURE_ATOMICS) || > > defined(RTE_ARM_FEATURE_ATOMICS) diff --git > > a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > > b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > > index 68e7c7338..bc4e3f8e6 100644 > > --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > > +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h > > @@ -18,6 +18,7 @@ extern "C" { > > * The time base for this lcore. > > */ > > #ifndef RTE_ARM_EAL_RDTSC_USE_PMU > > + > > /** > > * This call is portable to any ARMv8 architecture, however, typically > > * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks. > > @@ -59,11 +60,26 @@ rte_rdtsc(void) > > } > > #endif > > > > +#define arch_counter_enforce_ordering(val) do > { \ > > + uint64_t tmp, _val = (val); > \ > > + > \ > > + asm > volatile( > \ > > + " eor %0, %1, %1\n" > \ > > + " add %0, sp, %0\n" > \ > > + " ldr xzr, [%0]" > \ > > + : "=r" (tmp) : "r" (_val)); > \ > > +} while (0) > > + > > Not addressed the comments in > http://mails.dpdk.org/archives/dev/2020-March/159547.html > > Gavin, Linhaifeng, > I don't think, this ordering is valid in the DPDK context. > > See the patch from Will. > https://lore.kernel.org/patchwork/patch/1076132/ > > Thoughts? I think arch_counter_enforce_ordering maybe is invalid but isb must be valid for DPDK context. > > > > > static inline uint64_t > > rte_rdtsc_precise(void) > > { > > - rte_mb(); > > - return rte_rdtsc(); > > + uint64_t tsc; > > + > > + rte_isb(); > > + tsc = rte_rdtsc(); > > Not addressed the comments in > http://mails.dpdk.org/archives/dev/2020-March/159547.html > > > + arch_counter_enforce_ordering(tsc); > > + return tsc; > > } > > > > static inline uint64_t > > -- > > 2.24.1.windows.2 > >
On Wed, Mar 11, 2020 at 3:35 PM Linhaifeng <haifeng.lin@huawei.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Jerin Jacob [mailto:jerinjacobk@gmail.com]
> > Sent: Tuesday, March 10, 2020 6:47 PM
> > To: Linhaifeng <haifeng.lin@huawei.com>
> > Cc: Gavin Hu <Gavin.Hu@arm.com>; dev@dpdk.org; thomas@monjalon.net;
> > chenchanghu <chenchanghu@huawei.com>; xudingke
> > <xudingke@huawei.com>; Lilijun (Jerry) <jerry.lilijun@huawei.com>; Honnappa
> > Nagarahalli <Honnappa.Nagarahalli@arm.com>; Steve Capper
> > <Steve.Capper@arm.com>; nd <nd@arm.com>
> > Subject: Re: [PATCH v3] eal/arm64: fix rdtsc precise version
> >
> > On Tue, Mar 10, 2020 at 3:09 PM Linhaifeng <haifeng.lin@huawei.com> wrote:
> > >
> > > In order to get more accurate the cntvct_el0 reading, SW must invoke
> > > isb and arch_counter_enforce_ordering.
> > >
> > > Reference of linux kernel:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tre
> > > e/arch/arm64/include/asm/arch_timer.h?h=v5.5#n220
> > >
> > > Signed-off-by: Haifeng Lin <haifeng.lin@huawei.com>
> >
> > Not addressed Fixes: comment
> > http://mails.dpdk.org/archives/dev/2020-March/159547.html
> >
> > > ---
> > > .../common/include/arch/arm/rte_atomic_64.h | 3 +++
> > > .../common/include/arch/arm/rte_cycles_64.h | 20
> > +++++++++++++++++--
> > > 2 files changed, 21 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > index 859ae129d..2587f98a2 100644
> > > --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
> > > @@ -21,6 +21,7 @@ extern "C" {
> > >
> > > #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define
> > > dmb(opt) asm volatile("dmb " #opt : : : "memory")
> > > +#define isb() (asm volatile("isb" : : : "memory"))
> > >
> > > #define rte_mb() dsb(sy)
> > >
> > > @@ -44,6 +45,8 @@ extern "C" {
> > >
> > > #define rte_cio_rmb() dmb(oshld)
> > >
> > > +#define rte_isb() isb()
> >
> > Not addressed comment
> > http://mails.dpdk.org/archives/dev/2020-March/159547.html
> >
> >
> > > +
> > > /*------------------------ 128 bit atomic operations
> > > -------------------------*/
> > >
> > > #if defined(__ARM_FEATURE_ATOMICS) ||
> > > defined(RTE_ARM_FEATURE_ATOMICS) diff --git
> > > a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
> > > b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
> > > index 68e7c7338..bc4e3f8e6 100644
> > > --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
> > > +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
> > > @@ -18,6 +18,7 @@ extern "C" {
> > > * The time base for this lcore.
> > > */
> > > #ifndef RTE_ARM_EAL_RDTSC_USE_PMU
> > > +
> > > /**
> > > * This call is portable to any ARMv8 architecture, however, typically
> > > * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks.
> > > @@ -59,11 +60,26 @@ rte_rdtsc(void)
> > > }
> > > #endif
> > >
> > > +#define arch_counter_enforce_ordering(val) do
> > { \
> > > + uint64_t tmp, _val = (val);
> > \
> > > +
> > \
> > > + asm
> > volatile(
> > \
> > > + " eor %0, %1, %1\n"
> > \
> > > + " add %0, sp, %0\n"
> > \
> > > + " ldr xzr, [%0]"
> > \
> > > + : "=r" (tmp) : "r" (_val));
> > \
> > > +} while (0)
> > > +
> >
> > Not addressed the comments in
> > http://mails.dpdk.org/archives/dev/2020-March/159547.html
> >
> > Gavin, Linhaifeng,
> > I don't think, this ordering is valid in the DPDK context.
> >
> > See the patch from Will.
> > https://lore.kernel.org/patchwork/patch/1076132/
> >
> > Thoughts?
>
> I think arch_counter_enforce_ordering maybe is invalid but isb must be valid for DPDK context.
Yes. Only isb required in the DPDK context. That would translate to
the following change
- rte_mb();
+ asm volatile("isb" : : : "memory")
With the above change and fixing(removing
arch_counter_enforce_ordering) in git commit log:
Acked-by: Jerin Jacob <jerinj@marvell.com>