DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
@ 2014-12-12 23:04 r k
  2014-12-13 10:39 ` Neil Horman
  0 siblings, 1 reply; 12+ messages in thread
From: r k @ 2014-12-12 23:04 UTC (permalink / raw)
  To: dev

Subject: [PATCH] Minor fixes in rte_common.h file.

Fix rte_is_power_of_2 since 0 is not.
Avoid branching instructions in RTE_MAX and RTE_MIN.

Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
 lib/librte_eal/common/include/rte_common.h | 6 +++---
 lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
 lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_common.h
b/lib/librte_eal/common/include/rte_common.h
index 921b91f..e163f35 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
inline int  rte_is_power_of_2(uint32_t n)  {
-       return ((n-1) & n) == 0;
+       return n && !(n & (n - 1));
 }

 /**
@@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b) ({ \
                typeof (a) _a = (a); \
                typeof (b) _b = (b); \
-               _a < _b ? _a : _b; \
+                _b ^ ((_a ^ _b) & -(_a < _b)); \
        })

 /**
@@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b) ({ \
                typeof (a) _a = (a); \
                typeof (b) _b = (b); \
-               _a > _b ? _a : _b; \
+               _a ^ ((_a ^ _b) & -(_a < _b)); \
        })

 /*********** Other general functions / macros ********/ diff --git
a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
bc3816a..546499c 100644
--- a/lib/librte_pmd_e1000/igb_pf.c
+++ b/lib/librte_pmd_e1000/igb_pf.c
@@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t
vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct rte_eth_dev
*dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
-       int i;
+       int16_t i;
        uint32_t vector_bit;
        uint32_t vector_reg;
        uint32_t mta_reg;
-       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
+       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
                E1000_VT_MSGINFO_SHIFT;
        uint16_t *hash_list = (uint16_t *)&msgbuf[1];
        struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
__rte_unused uint32_t vf, uint32
        struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_vf_info *vfinfo =
                *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
-       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
+       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
                IXGBE_VT_MSGINFO_SHIFT;
        uint16_t *hash_list = (uint16_t *)&msgbuf[1];
        uint32_t mta_idx;
@@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
__rte_unused uint32_t vf, uint32
        const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
        const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) -
1;
        uint32_t reg_val;
-       int i;
+       int16_t i;

        /* only so many hash values supported */
        nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
--
1.9.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-12 23:04 [dpdk-dev] [PATCH] Minor fixes in rte_common.h file r k
@ 2014-12-13 10:39 ` Neil Horman
  2014-12-16 16:46   ` Ravi Kerur
  0 siblings, 1 reply; 12+ messages in thread
From: Neil Horman @ 2014-12-13 10:39 UTC (permalink / raw)
  To: r k; +Cc: dev

On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> Subject: [PATCH] Minor fixes in rte_common.h file.
> 
> Fix rte_is_power_of_2 since 0 is not.
> Avoid branching instructions in RTE_MAX and RTE_MIN.
> 
> Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> ---
>  lib/librte_eal/common/include/rte_common.h | 6 +++---
>  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
>  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
>  3 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_common.h
> b/lib/librte_eal/common/include/rte_common.h
> index 921b91f..e163f35 100644
> --- a/lib/librte_eal/common/include/rte_common.h
> +++ b/lib/librte_eal/common/include/rte_common.h
> @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> inline int  rte_is_power_of_2(uint32_t n)  {
> -       return ((n-1) & n) == 0;
> +       return n && !(n & (n - 1));
>  }
> 
>  /**
> @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b) ({ \
>                 typeof (a) _a = (a); \
>                 typeof (b) _b = (b); \
> -               _a < _b ? _a : _b; \
> +                _b ^ ((_a ^ _b) & -(_a < _b)); \
Are you sure this is actually faster than the branch version?  What about using
a cmov instead?

>         })
> 
>  /**
> @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b) ({ \
>                 typeof (a) _a = (a); \
>                 typeof (b) _b = (b); \
> -               _a > _b ? _a : _b; \
> +               _a ^ ((_a ^ _b) & -(_a < _b)); \
Same as above

>         })
> 
>  /*********** Other general functions / macros ********/ diff --git
> a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> bc3816a..546499c 100644
> --- a/lib/librte_pmd_e1000/igb_pf.c
> +++ b/lib/librte_pmd_e1000/igb_pf.c
> @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t
> vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct rte_eth_dev
> *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> -       int i;
> +       int16_t i;
>         uint32_t vector_bit;
>         uint32_t vector_reg;
>         uint32_t mta_reg;
> -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
>                 E1000_VT_MSGINFO_SHIFT;
NAK, this has nothing to do with the included changelog

>         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
>         struct e1000_hw *hw =
> E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> __rte_unused uint32_t vf, uint32
>         struct ixgbe_hw *hw =
> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>         struct ixgbe_vf_info *vfinfo =
>                 *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
>                 IXGBE_VT_MSGINFO_SHIFT;
ditto
>         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
>         uint32_t mta_idx;
> @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> __rte_unused uint32_t vf, uint32
>         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
>         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) -
> 1;
>         uint32_t reg_val;
> -       int i;
> +       int16_t i;
ditto

> 
>         /* only so many hash values supported */
>         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> --
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-13 10:39 ` Neil Horman
@ 2014-12-16 16:46   ` Ravi Kerur
  2014-12-16 17:23     ` Ananyev, Konstantin
  2014-12-16 21:40     ` Neil Horman
  0 siblings, 2 replies; 12+ messages in thread
From: Ravi Kerur @ 2014-12-16 16:46 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > Subject: [PATCH] Minor fixes in rte_common.h file.
> >
> > Fix rte_is_power_of_2 since 0 is not.
> > Avoid branching instructions in RTE_MAX and RTE_MIN.
> >
> > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > ---
> >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> >  3 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_common.h
> > b/lib/librte_eal/common/include/rte_common.h
> > index 921b91f..e163f35 100644
> > --- a/lib/librte_eal/common/include/rte_common.h
> > +++ b/lib/librte_eal/common/include/rte_common.h
> > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > inline int  rte_is_power_of_2(uint32_t n)  {
> > -       return ((n-1) & n) == 0;
> > +       return n && !(n & (n - 1));
> >  }
> >
> >  /**
> > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> ({ \
> >                 typeof (a) _a = (a); \
> >                 typeof (b) _b = (b); \
> > -               _a < _b ? _a : _b; \
> > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> Are you sure this is actually faster than the branch version?  What about
> using
> a cmov instead?
>
>
<rk> i am pretty sure modified code is faster than branching. I remember
cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
perform.

>         })
> >
> >  /**
> > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> ({ \
> >                 typeof (a) _a = (a); \
> >                 typeof (b) _b = (b); \
> > -               _a > _b ? _a : _b; \
> > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> Same as above
>
> <rk> Same as above.

> >         })
> >
> >  /*********** Other general functions / macros ********/ diff --git
> > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > bc3816a..546499c 100644
> > --- a/lib/librte_pmd_e1000/igb_pf.c
> > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> uint32_t
> > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> rte_eth_dev
> > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > -       int i;
> > +       int16_t i;
> >         uint32_t vector_bit;
> >         uint32_t vector_reg;
> >         uint32_t mta_reg;
> > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> >                 E1000_VT_MSGINFO_SHIFT;
> NAK, this has nothing to do with the included changelog
>

<rk> It does, it causes compilation errors such as

/root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
\u2018igb_pf_mbx_process\u2019:
/root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
subscript is above array bounds [-Werror=array-bounds]
   vfinfo->vf_mc_hashes[i] = hash_list[i];
                       ^
cc1: all warnings being treated as errors

Also it is always better to use explicit int definitions esp. for 64bit
systems.



>
> >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> >         struct e1000_hw *hw =
> > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > __rte_unused uint32_t vf, uint32
> >         struct ixgbe_hw *hw =
> > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >         struct ixgbe_vf_info *vfinfo =
> >                 *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> >                 IXGBE_VT_MSGINFO_SHIFT;
> ditto
> >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> >         uint32_t mta_idx;
> > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > __rte_unused uint32_t vf, uint32
> >         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> >         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT)
> -
> > 1;
> >         uint32_t reg_val;
> > -       int i;
> > +       int16_t i;
> ditto
>
> <rk> Same as above.

> >
> >         /* only so many hash values supported */
> >         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > --
> > 1.9.1
> >
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-16 16:46   ` Ravi Kerur
@ 2014-12-16 17:23     ` Ananyev, Konstantin
  2014-12-16 20:13       ` Ravi Kerur
  2014-12-16 21:40     ` Neil Horman
  1 sibling, 1 reply; 12+ messages in thread
From: Ananyev, Konstantin @ 2014-12-16 17:23 UTC (permalink / raw)
  To: Ravi Kerur, Neil Horman; +Cc: dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ravi Kerur
> Sent: Tuesday, December 16, 2014 4:47 PM
> To: Neil Horman
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> 
> On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > >
> > > Fix rte_is_power_of_2 since 0 is not.
> > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > >
> > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > ---
> > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > b/lib/librte_eal/common/include/rte_common.h
> > > index 921b91f..e163f35 100644
> > > --- a/lib/librte_eal/common/include/rte_common.h
> > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > -       return ((n-1) & n) == 0;
> > > +       return n && !(n & (n - 1));
> > >  }
> > >
> > >  /**
> > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> > ({ \
> > >                 typeof (a) _a = (a); \
> > >                 typeof (b) _b = (b); \
> > > -               _a < _b ? _a : _b; \
> > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > Are you sure this is actually faster than the branch version?  What about
> > using
> > a cmov instead?
> >
> >
> <rk> i am pretty sure modified code is faster than branching. I remember
> cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> perform.

I also think most modern compilers are smart enough to avoid any branching here and will use cmov instead.
And we are way ahead of Pentium 4 times these days.
Konstantin

> 
> >         })
> > >
> > >  /**
> > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> > ({ \
> > >                 typeof (a) _a = (a); \
> > >                 typeof (b) _b = (b); \
> > > -               _a > _b ? _a : _b; \
> > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > Same as above
> >
> > <rk> Same as above.
> 
> > >         })
> > >
> > >  /*********** Other general functions / macros ********/ diff --git
> > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > bc3816a..546499c 100644
> > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > uint32_t
> > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > rte_eth_dev
> > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > -       int i;
> > > +       int16_t i;
> > >         uint32_t vector_bit;
> > >         uint32_t vector_reg;
> > >         uint32_t mta_reg;
> > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > >                 E1000_VT_MSGINFO_SHIFT;
> > NAK, this has nothing to do with the included changelog
> >
> 
> <rk> It does, it causes compilation errors such as
> 
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> \u2018igb_pf_mbx_process\u2019:
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> subscript is above array bounds [-Werror=array-bounds]
>    vfinfo->vf_mc_hashes[i] = hash_list[i];
>                        ^
> cc1: all warnings being treated as errors
> 
> Also it is always better to use explicit int definitions esp. for 64bit
> systems.
> 
> 
> 
> >
> > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > >         struct e1000_hw *hw =
> > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > __rte_unused uint32_t vf, uint32
> > >         struct ixgbe_hw *hw =
> > > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >         struct ixgbe_vf_info *vfinfo =
> > >                 *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > > -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > >                 IXGBE_VT_MSGINFO_SHIFT;
> > ditto
> > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > >         uint32_t mta_idx;
> > > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > __rte_unused uint32_t vf, uint32
> > >         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> > >         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT)
> > -
> > > 1;
> > >         uint32_t reg_val;
> > > -       int i;
> > > +       int16_t i;
> > ditto
> >
> > <rk> Same as above.
> 
> > >
> > >         /* only so many hash values supported */
> > >         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > > --
> > > 1.9.1
> > >
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-16 17:23     ` Ananyev, Konstantin
@ 2014-12-16 20:13       ` Ravi Kerur
       [not found]         ` <2601191342CEEE43887BDE71AB977258213C1499@IRSMSX105.ger.corp.intel.com>
  0 siblings, 1 reply; 12+ messages in thread
From: Ravi Kerur @ 2014-12-16 20:13 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev

On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ravi Kerur
> > Sent: Tuesday, December 16, 2014 4:47 PM
> > To: Neil Horman
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > ---
> > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > -       return ((n-1) & n) == 0;
> > > > +       return n && !(n & (n - 1));
> > > >  }
> > > >
> > > >  /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a < _b ? _a : _b; \
> > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?  What
> about
> > > using
> > > a cmov instead?
> > >
> > >
> > <rk> i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > perform.
>
> I also think most modern compilers are smart enough to avoid any branching
> here and will use cmov instead.
> And we are way ahead of Pentium 4 times these days.

Konstantin
>

<rk>Konstantin,  Can you please elaborate, is it something done
automatically with Intel's icc compiler? My understanding is branch
prediction can be influenced only by using compiler builtin i.e.
__builtin_expect() , without this compiler will generate regular
instructions(cmp/jump instructions). I wrote small program and compiled
with gcc -02/-03, don't see cmov instruction.


> >
> > >         })
> > > >
> > > >  /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a > _b ? _a : _b; \
> > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > Same as above
> > >
> > > <rk> Same as above.
> >
> > > >         })
> > > >
> > > >  /*********** Other general functions / macros ********/ diff --git
> > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > bc3816a..546499c 100644
> > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > uint32_t
> > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > rte_eth_dev
> > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > -       int i;
> > > > +       int16_t i;
> > > >         uint32_t vector_bit;
> > > >         uint32_t vector_reg;
> > > >         uint32_t mta_reg;
> > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > >                 E1000_VT_MSGINFO_SHIFT;
> > > NAK, this has nothing to do with the included changelog
> > >
> >
> > <rk> It does, it causes compilation errors such as
> >
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > \u2018igb_pf_mbx_process\u2019:
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > subscript is above array bounds [-Werror=array-bounds]
> >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> >                        ^
> > cc1: all warnings being treated as errors
> >
> > Also it is always better to use explicit int definitions esp. for 64bit
> > systems.
> >
> >
> >
> > >
> > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > >         struct e1000_hw *hw =
> > > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > __rte_unused uint32_t vf, uint32
> > > >         struct ixgbe_hw *hw =
> > > > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > >         struct ixgbe_vf_info *vfinfo =
> > > >
>  *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > > > -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > > +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > >                 IXGBE_VT_MSGINFO_SHIFT;
> > > ditto
> > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > >         uint32_t mta_idx;
> > > > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > __rte_unused uint32_t vf, uint32
> > > >         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> > > >         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 <<
> IXGBE_MTA_BIT_SHIFT)
> > > -
> > > > 1;
> > > >         uint32_t reg_val;
> > > > -       int i;
> > > > +       int16_t i;
> > > ditto
> > >
> > > <rk> Same as above.
> >
> > > >
> > > >         /* only so many hash values supported */
> > > >         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > > > --
> > > > 1.9.1
> > > >
> > >
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-16 16:46   ` Ravi Kerur
  2014-12-16 17:23     ` Ananyev, Konstantin
@ 2014-12-16 21:40     ` Neil Horman
  2014-12-17 16:40       ` Ravi Kerur
  1 sibling, 1 reply; 12+ messages in thread
From: Neil Horman @ 2014-12-16 21:40 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > >
> > > Fix rte_is_power_of_2 since 0 is not.
> > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > >
> > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > ---
> > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > b/lib/librte_eal/common/include/rte_common.h
> > > index 921b91f..e163f35 100644
> > > --- a/lib/librte_eal/common/include/rte_common.h
> > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > -       return ((n-1) & n) == 0;
> > > +       return n && !(n & (n - 1));
> > >  }
> > >
> > >  /**
> > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> > ({ \
> > >                 typeof (a) _a = (a); \
> > >                 typeof (b) _b = (b); \
> > > -               _a < _b ? _a : _b; \
> > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > Are you sure this is actually faster than the branch version?  What about
> > using
> > a cmov instead?
> >
> >
> <rk> i am pretty sure modified code is faster than branching. I remember
> cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> perform.
> 
Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
advantage.  Some perf tests to deomonstrate the advantage here would be great.

> >         })
> > >
> > >  /**
> > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> > ({ \
> > >                 typeof (a) _a = (a); \
> > >                 typeof (b) _b = (b); \
> > > -               _a > _b ? _a : _b; \
> > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > Same as above
> >
> > <rk> Same as above.
> 
> > >         })
> > >
> > >  /*********** Other general functions / macros ********/ diff --git
> > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > bc3816a..546499c 100644
> > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > uint32_t
> > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > rte_eth_dev
> > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > -       int i;
> > > +       int16_t i;
> > >         uint32_t vector_bit;
> > >         uint32_t vector_reg;
> > >         uint32_t mta_reg;
> > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > >                 E1000_VT_MSGINFO_SHIFT;
> > NAK, this has nothing to do with the included changelog
> >
> 
> <rk> It does, it causes compilation errors such as
> 
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> \u2018igb_pf_mbx_process\u2019:
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> subscript is above array bounds [-Werror=array-bounds]
>    vfinfo->vf_mc_hashes[i] = hash_list[i];
>                        ^
> cc1: all warnings being treated as errors
> 
> Also it is always better to use explicit int definitions esp. for 64bit
> systems.
> 

This is your changelog:
=============================================================
Subject: [PATCH] Minor fixes in rte_common.h file.

Fix rte_is_power_of_2 since 0 is not.
Avoid branching instructions in RTE_MAX and RTE_MIN
=============================================================

Nowhere does your changelog indicate that you are fixing compliation errors.
That would in and of itself be far more serious that making micro optimizations.
If you want to fix build breaks, great, please do, but send a patch that clearly
indicates what the break is and how your fixing it. Don't just toss it in with
whatever other work you happen to be doing.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
       [not found]         ` <2601191342CEEE43887BDE71AB977258213C1499@IRSMSX105.ger.corp.intel.com>
@ 2014-12-17  1:05           ` Ananyev, Konstantin
  2014-12-17 16:28             ` Ravi Kerur
  0 siblings, 1 reply; 12+ messages in thread
From: Ananyev, Konstantin @ 2014-12-17  1:05 UTC (permalink / raw)
  To: dev



> 
> From: Ravi Kerur [mailto:rkerur@gmail.com]
> Sent: Tuesday, December 16, 2014 8:14 PM
> To: Ananyev, Konstantin
> Cc: Neil Horman; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> 
> 
> 
> On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin <konstantin.ananyev@intel.com> wrote:
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ravi Kerur
> > Sent: Tuesday, December 16, 2014 4:47 PM
> > To: Neil Horman
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > ---
> > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > -       return ((n-1) & n) == 0;
> > > > +       return n && !(n & (n - 1));
> > > >  }
> > > >
> > > >  /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a < _b ? _a : _b; \
> > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?  What about
> > > using
> > > a cmov instead?
> > >
> > >
> > <rk> i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> > perform.
> I also think most modern compilers are smart enough to avoid any branching here and will use cmov instead.
> And we are way ahead of Pentium 4 times these days.
> Konstantin
> 
> <rk>Konstantin,  Can you please elaborate, is it something done automatically with Intel's icc compiler? My understanding is branch
> prediction can be influenced only by using compiler builtin i.e. __builtin_expect() , without this compiler will generate regular
> instructions(cmp/jump instructions). I wrote small program and compiled with gcc -02/-03, don't see cmov instruction.

I am saying that there is probably no need to modify these macros.
On IA , for constructions like: "_a < _b ? _a : _b;"
modern compilers in many cases will avoid any branches and emit cmov instead.

$ cat tcmv1.c

#include <stdint.h>
#include <stddef.h>

#define RTE_MIN(a, b) ({ \
                typeof (a) _a = (a); \
                typeof (b) _b = (b); \
                _a < _b ? _a : _b; \
        })

int
fxmini32(int a, int b)
{
        return RTE_MIN(a, b);
}

int
fxminu64(uint64_t a, uint64_t b)
{
        return RTE_MIN(a, b);
}

$gcc -O3 -m64 -S tcmv1.c

$ cat tcmv1.s
        .file   "tcmv1.c"
        .text
        .p2align 4,,15
        .globl  fxmini32
        .type   fxmini32, @function
fxmini32:
.LFB0:
        .cfi_startproc
        cmpl    %esi, %edi
        movl    %esi, %eax
        cmovle  %edi, %eax
        ret
        .cfi_endproc
.LFE0:
        .size   fxmini32, .-fxmini32
        .p2align 4,,15
        .globl  fxminu64
        .type   fxminu64, @function
fxminu64:
.LFB1:
        .cfi_startproc
        cmpq    %rsi, %rdi
        movq    %rsi, %rax
        cmovbe  %rdi, %rax
        ret
        .cfi_endproc

gcc version 4.8.3
clang produces similar code.

Konstantin

> 
> >
> > >         })
> > > >
> > > >  /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a > _b ? _a : _b; \
> > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > Same as above
> > >
> > > <rk> Same as above.
> >
> > > >         })
> > > >
> > > >  /*********** Other general functions / macros ********/ diff --git
> > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > bc3816a..546499c 100644
> > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > uint32_t
> > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > rte_eth_dev
> > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > -       int i;
> > > > +       int16_t i;
> > > >         uint32_t vector_bit;
> > > >         uint32_t vector_reg;
> > > >         uint32_t mta_reg;
> > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > >                 E1000_VT_MSGINFO_SHIFT;
> > > NAK, this has nothing to do with the included changelog
> > >
> >
> > <rk> It does, it causes compilation errors such as
> >
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > \u2018igb_pf_mbx_process\u2019:
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > subscript is above array bounds [-Werror=array-bounds]
> >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> >                        ^
> > cc1: all warnings being treated as errors
> >
> > Also it is always better to use explicit int definitions esp. for 64bit
> > systems.
> >
> >
> >
> > >
> > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > >         struct e1000_hw *hw =
> > > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > __rte_unused uint32_t vf, uint32
> > > >         struct ixgbe_hw *hw =
> > > > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > >         struct ixgbe_vf_info *vfinfo =
> > > >                 *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > > > -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > > +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > >                 IXGBE_VT_MSGINFO_SHIFT;
> > > ditto
> > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > >         uint32_t mta_idx;
> > > > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > __rte_unused uint32_t vf, uint32
> > > >         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> > > >         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT)
> > > -
> > > > 1;
> > > >         uint32_t reg_val;
> > > > -       int i;
> > > > +       int16_t i;
> > > ditto
> > >
> > > <rk> Same as above.
> >
> > > >
> > > >         /* only so many hash values supported */
> > > >         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > > > --
> > > > 1.9.1
> > > >
> > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-17  1:05           ` Ananyev, Konstantin
@ 2014-12-17 16:28             ` Ravi Kerur
  0 siblings, 0 replies; 12+ messages in thread
From: Ravi Kerur @ 2014-12-17 16:28 UTC (permalink / raw)
  To: Ananyev, Konstantin; +Cc: dev

On Tue, Dec 16, 2014 at 5:05 PM, Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:
>
>
>
> >
> > From: Ravi Kerur [mailto:rkerur@gmail.com]
> > Sent: Tuesday, December 16, 2014 8:14 PM
> > To: Ananyev, Konstantin
> > Cc: Neil Horman; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> >
> >
> > On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin <
> konstantin.ananyev@intel.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ravi Kerur
> > > Sent: Tuesday, December 16, 2014 4:47 PM
> > > To: Neil Horman
> > > Cc: dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> > >
> > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> > > >
> > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > >
> > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > >
> > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > ---
> > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > index 921b91f..e163f35 100644
> > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > -       return ((n-1) & n) == 0;
> > > > > +       return n && !(n & (n - 1));
> > > > >  }
> > > > >
> > > > >  /**
> > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MIN(a, b)
> > > > ({ \
> > > > >                 typeof (a) _a = (a); \
> > > > >                 typeof (b) _b = (b); \
> > > > > -               _a < _b ? _a : _b; \
> > > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Are you sure this is actually faster than the branch version?  What
> about
> > > > using
> > > > a cmov instead?
> > > >
> > > >
> > > <rk> i am pretty sure modified code is faster than branching. I
> remember
> > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > > perform.
> > I also think most modern compilers are smart enough to avoid any
> branching here and will use cmov instead.
> > And we are way ahead of Pentium 4 times these days.
> > Konstantin
> >
> > <rk>Konstantin,  Can you please elaborate, is it something done
> automatically with Intel's icc compiler? My understanding is branch
> > prediction can be influenced only by using compiler builtin i.e.
> __builtin_expect() , without this compiler will generate regular
> > instructions(cmp/jump instructions). I wrote small program and compiled
> with gcc -02/-03, don't see cmov instruction.
>
> I am saying that there is probably no need to modify these macros.
> On IA , for constructions like: "_a < _b ? _a : _b;"
> modern compilers in many cases will avoid any branches and emit cmov
> instead.
>
> $ cat tcmv1.c
>
> #include <stdint.h>
> #include <stddef.h>
>
> #define RTE_MIN(a, b) ({ \
>                 typeof (a) _a = (a); \
>                 typeof (b) _b = (b); \
>                 _a < _b ? _a : _b; \
>         })
>
> int
> fxmini32(int a, int b)
> {
>         return RTE_MIN(a, b);
> }
>
> int
> fxminu64(uint64_t a, uint64_t b)
> {
>         return RTE_MIN(a, b);
> }
>
> $gcc -O3 -m64 -S tcmv1.c
>
> $ cat tcmv1.s
>         .file   "tcmv1.c"
>         .text
>         .p2align 4,,15
>         .globl  fxmini32
>         .type   fxmini32, @function
> fxmini32:
> .LFB0:
>         .cfi_startproc
>         cmpl    %esi, %edi
>         movl    %esi, %eax
>         cmovle  %edi, %eax
>         ret
>         .cfi_endproc
> .LFE0:
>         .size   fxmini32, .-fxmini32
>         .p2align 4,,15
>         .globl  fxminu64
>         .type   fxminu64, @function
> fxminu64:
> .LFB1:
>         .cfi_startproc
>         cmpq    %rsi, %rdi
>         movq    %rsi, %rax
>         cmovbe  %rdi, %rax
>         ret
>         .cfi_endproc
>
> gcc version 4.8.3
> clang produces similar code.
>
> Konstantin
>

<rk> Thanks, looks like it depends on gcc version. I have

  .ident "GCC: (GNU) 4.4.6 20110731 (Red Hat 4.4.6-3)"
.section .note.GNU-stack,"",@progbits

...

and it still generates cmp/jump instructions. Anyways i will drop this
patch and just send fix for power_of_2.

-Ravi


> >
> > >
> > > >         })
> > > > >
> > > > >  /**
> > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MAX(a, b)
> > > > ({ \
> > > > >                 typeof (a) _a = (a); \
> > > > >                 typeof (b) _b = (b); \
> > > > > -               _a > _b ? _a : _b; \
> > > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Same as above
> > > >
> > > > <rk> Same as above.
> > >
> > > > >         })
> > > > >
> > > > >  /*********** Other general functions / macros ********/ diff --git
> > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c
> index
> > > > > bc3816a..546499c 100644
> > > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > > uint32_t
> > > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > > rte_eth_dev
> > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > > -       int i;
> > > > > +       int16_t i;
> > > > >         uint32_t vector_bit;
> > > > >         uint32_t vector_reg;
> > > > >         uint32_t mta_reg;
> > > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > >                 E1000_VT_MSGINFO_SHIFT;
> > > > NAK, this has nothing to do with the included changelog
> > > >
> > >
> > > <rk> It does, it causes compilation errors such as
> > >
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > > \u2018igb_pf_mbx_process\u2019:
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > > subscript is above array bounds [-Werror=array-bounds]
> > >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> > >                        ^
> > > cc1: all warnings being treated as errors
> > >
> > > Also it is always better to use explicit int definitions esp. for 64bit
> > > systems.
> > >
> > >
> > >
> > > >
> > > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > > >         struct e1000_hw *hw =
> > > > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > > > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > > > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > > __rte_unused uint32_t vf, uint32
> > > > >         struct ixgbe_hw *hw =
> > > > > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > > >         struct ixgbe_vf_info *vfinfo =
> > > > >
>  *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > > > > -       int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > > > +       int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > > > >                 IXGBE_VT_MSGINFO_SHIFT;
> > > > ditto
> > > > >         uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > > >         uint32_t mta_idx;
> > > > > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > > > > __rte_unused uint32_t vf, uint32
> > > > >         const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> > > > >         const uint32_t IXGBE_MTA_BIT_MASK = (0x1 <<
> IXGBE_MTA_BIT_SHIFT)
> > > > -
> > > > > 1;
> > > > >         uint32_t reg_val;
> > > > > -       int i;
> > > > > +       int16_t i;
> > > > ditto
> > > >
> > > > <rk> Same as above.
> > >
> > > > >
> > > > >         /* only so many hash values supported */
> > > > >         nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > > > > --
> > > > > 1.9.1
> > > > >
> > > >
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-16 21:40     ` Neil Horman
@ 2014-12-17 16:40       ` Ravi Kerur
  2014-12-18 19:07         ` Neil Horman
  0 siblings, 1 reply; 12+ messages in thread
From: Ravi Kerur @ 2014-12-17 16:40 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
>
> On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > ---
> > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > -       return ((n-1) & n) == 0;
> > > > +       return n && !(n & (n - 1));
> > > >  }
> > > >
> > > >  /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a < _b ? _a : _b; \
> > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?  What
> about
> > > using
> > > a cmov instead?
> > >
> > >
> > <rk> i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > perform.
> >
> Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
> advantage.  Some perf tests to deomonstrate the advantage here would be
> great.
>

<rk> I have used this before with the intent to avoid branching and it was
part of other changes I did for performance improvement in our code.

>
> > >         })
> > > >
> > > >  /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> b)
> > > ({ \
> > > >                 typeof (a) _a = (a); \
> > > >                 typeof (b) _b = (b); \
> > > > -               _a > _b ? _a : _b; \
> > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > Same as above
> > >
> > > <rk> Same as above.
> >
> > > >         })
> > > >
> > > >  /*********** Other general functions / macros ********/ diff --git
> > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > bc3816a..546499c 100644
> > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > uint32_t
> > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > rte_eth_dev
> > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > -       int i;
> > > > +       int16_t i;
> > > >         uint32_t vector_bit;
> > > >         uint32_t vector_reg;
> > > >         uint32_t mta_reg;
> > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > >                 E1000_VT_MSGINFO_SHIFT;
> > > NAK, this has nothing to do with the included changelog
> > >
> >
> > <rk> It does, it causes compilation errors such as
> >
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > \u2018igb_pf_mbx_process\u2019:
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > subscript is above array bounds [-Werror=array-bounds]
> >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> >                        ^
> > cc1: all warnings being treated as errors
> >
> > Also it is always better to use explicit int definitions esp. for 64bit
> > systems.
> >
>
> This is your changelog:
> =============================================================
> Subject: [PATCH] Minor fixes in rte_common.h file.
>
> Fix rte_is_power_of_2 since 0 is not.
> Avoid branching instructions in RTE_MAX and RTE_MIN
> =============================================================
>
> Nowhere does your changelog indicate that you are fixing compliation
> errors.
> That would in and of itself be far more serious that making micro
> optimizations.
> If you want to fix build breaks, great, please do, but send a patch that
> clearly
> indicates what the break is and how your fixing it. Don't just toss it in
> with
> whatever other work you happen to be doing.
>

<rk> Main reason was to replace int with explicit sized int, it happened to
fix compiler errors as well. I will make sure comments cover everything
next time. Anyways I will drop this patch and just include fix for
power_of_2.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-17 16:40       ` Ravi Kerur
@ 2014-12-18 19:07         ` Neil Horman
  2014-12-19 13:28           ` Ravi Kerur
  0 siblings, 1 reply; 12+ messages in thread
From: Neil Horman @ 2014-12-18 19:07 UTC (permalink / raw)
  To: Ravi Kerur; +Cc: dev

On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote:
> On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> >
> > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com>
> > wrote:
> > > >
> > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > >
> > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > >
> > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > ---
> > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > index 921b91f..e163f35 100644
> > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> > static
> > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > -       return ((n-1) & n) == 0;
> > > > > +       return n && !(n & (n - 1));
> > > > >  }
> > > > >
> > > > >  /**
> > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> > b)
> > > > ({ \
> > > > >                 typeof (a) _a = (a); \
> > > > >                 typeof (b) _b = (b); \
> > > > > -               _a < _b ? _a : _b; \
> > > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Are you sure this is actually faster than the branch version?  What
> > about
> > > > using
> > > > a cmov instead?
> > > >
> > > >
> > > <rk> i am pretty sure modified code is faster than branching. I remember
> > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> > cpu's
> > > perform.
> > >
> > Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
> > advantage.  Some perf tests to deomonstrate the advantage here would be
> > great.
> >
> 
> <rk> I have used this before with the intent to avoid branching and it was
> part of other changes I did for performance improvement in our code.
> 

Then it should be pretty easy to provide the perf data demonstrating the
advantage in this code.

> >
> > > >         })
> > > > >
> > > > >  /**
> > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> > b)
> > > > ({ \
> > > > >                 typeof (a) _a = (a); \
> > > > >                 typeof (b) _b = (b); \
> > > > > -               _a > _b ? _a : _b; \
> > > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Same as above
> > > >
> > > > <rk> Same as above.
> > >
> > > > >         })
> > > > >
> > > > >  /*********** Other general functions / macros ********/ diff --git
> > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > > bc3816a..546499c 100644
> > > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > > uint32_t
> > > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > > rte_eth_dev
> > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > > -       int i;
> > > > > +       int16_t i;
> > > > >         uint32_t vector_bit;
> > > > >         uint32_t vector_reg;
> > > > >         uint32_t mta_reg;
> > > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > >                 E1000_VT_MSGINFO_SHIFT;
> > > > NAK, this has nothing to do with the included changelog
> > > >
> > >
> > > <rk> It does, it causes compilation errors such as
> > >
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > > \u2018igb_pf_mbx_process\u2019:
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > > subscript is above array bounds [-Werror=array-bounds]
> > >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> > >                        ^
> > > cc1: all warnings being treated as errors
> > >
> > > Also it is always better to use explicit int definitions esp. for 64bit
> > > systems.
> > >
> >
> > This is your changelog:
> > =============================================================
> > Subject: [PATCH] Minor fixes in rte_common.h file.
> >
> > Fix rte_is_power_of_2 since 0 is not.
> > Avoid branching instructions in RTE_MAX and RTE_MIN
> > =============================================================
> >
> > Nowhere does your changelog indicate that you are fixing compliation
> > errors.
> > That would in and of itself be far more serious that making micro
> > optimizations.
> > If you want to fix build breaks, great, please do, but send a patch that
> > clearly
> > indicates what the break is and how your fixing it. Don't just toss it in
> > with
> > whatever other work you happen to be doing.
> >
> 
> <rk> Main reason was to replace int with explicit sized int, it happened to
> fix compiler errors as well. I will make sure comments cover everything
> next time. Anyways I will drop this patch and just include fix for
> power_of_2.
Please separate the compiler warning fixes from the performance enhancing fixes.
They shouldn't be mashed together.

Neil

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
  2014-12-18 19:07         ` Neil Horman
@ 2014-12-19 13:28           ` Ravi Kerur
  0 siblings, 0 replies; 12+ messages in thread
From: Ravi Kerur @ 2014-12-19 13:28 UTC (permalink / raw)
  To: Neil Horman; +Cc: dev

On Thu, Dec 18, 2014 at 11:07 AM, Neil Horman <nhorman@tuxdriver.com> wrote:

> On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote:
> > On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman <nhorman@tuxdriver.com>
> wrote:
> > >
> > > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman <nhorman@tuxdriver.com>
> > > wrote:
> > > > >
> > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > > >
> > > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > > >
> > > > > > Signed-off-by: Ravi Kerur <rkerur@gmail.com>
> > > > > > ---
> > > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > > >  lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
> > > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
> > > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > > index 921b91f..e163f35 100644
> > > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> > > static
> > > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > > -       return ((n-1) & n) == 0;
> > > > > > +       return n && !(n & (n - 1));
> > > > > >  }
> > > > > >
> > > > > >  /**
> > > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MIN(a,
> > > b)
> > > > > ({ \
> > > > > >                 typeof (a) _a = (a); \
> > > > > >                 typeof (b) _b = (b); \
> > > > > > -               _a < _b ? _a : _b; \
> > > > > > +                _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > > Are you sure this is actually faster than the branch version?  What
> > > about
> > > > > using
> > > > > a cmov instead?
> > > > >
> > > > >
> > > > <rk> i am pretty sure modified code is faster than branching. I
> remember
> > > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> > > cpu's
> > > > perform.
> > > >
> > > Pretty sure isn't sure.  Theres no point in code churn if theres no
> obvious
> > > advantage.  Some perf tests to deomonstrate the advantage here would be
> > > great.
> > >
> >
> > <rk> I have used this before with the intent to avoid branching and it
> was
> > part of other changes I did for performance improvement in our code.
> >
>
> Then it should be pretty easy to provide the perf data demonstrating the
> advantage in this code.
>
>
<rk> I decided to drop this change because

1. DPDK manual suggests gcc version 4.5.x or greater
2. I was testing code against gcc 4.4.6 (which generated cmp/jump
instructions) and Konstantin showed using gcc 4.8.3 it generates cmov
instructions

If you think it should be pursued further let me know. I can look into
difference between the code generated for both using gcc > 4.5 .x and
update.

Thanks,
Ravi


> > >
> > > > >         })
> > > > > >
> > > > > >  /**
> > > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MAX(a,
> > > b)
> > > > > ({ \
> > > > > >                 typeof (a) _a = (a); \
> > > > > >                 typeof (b) _b = (b); \
> > > > > > -               _a > _b ? _a : _b; \
> > > > > > +               _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > > > Same as above
> > > > >
> > > > > <rk> Same as above.
> > > >
> > > > > >         })
> > > > > >
> > > > > >  /*********** Other general functions / macros ********/ diff
> --git
> > > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c
> index
> > > > > > bc3816a..546499c 100644
> > > > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev
> *dev,
> > > > > uint32_t
> > > > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > > > rte_eth_dev
> > > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > > > -       int i;
> > > > > > +       int16_t i;
> > > > > >         uint32_t vector_bit;
> > > > > >         uint32_t vector_reg;
> > > > > >         uint32_t mta_reg;
> > > > > > -       int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > > +       int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > >                 E1000_VT_MSGINFO_SHIFT;
> > > > > NAK, this has nothing to do with the included changelog
> > > > >
> > > >
> > > > <rk> It does, it causes compilation errors such as
> > > >
> > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > > > \u2018igb_pf_mbx_process\u2019:
> > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error:
> array
> > > > subscript is above array bounds [-Werror=array-bounds]
> > > >    vfinfo->vf_mc_hashes[i] = hash_list[i];
> > > >                        ^
> > > > cc1: all warnings being treated as errors
> > > >
> > > > Also it is always better to use explicit int definitions esp. for
> 64bit
> > > > systems.
> > > >
> > >
> > > This is your changelog:
> > > =============================================================
> > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > >
> > > Fix rte_is_power_of_2 since 0 is not.
> > > Avoid branching instructions in RTE_MAX and RTE_MIN
> > > =============================================================
> > >
> > > Nowhere does your changelog indicate that you are fixing compliation
> > > errors.
> > > That would in and of itself be far more serious that making micro
> > > optimizations.
> > > If you want to fix build breaks, great, please do, but send a patch
> that
> > > clearly
> > > indicates what the break is and how your fixing it. Don't just toss it
> in
> > > with
> > > whatever other work you happen to be doing.
> > >
> >
> > <rk> Main reason was to replace int with explicit sized int, it happened
> to
> > fix compiler errors as well. I will make sure comments cover everything
> > next time. Anyways I will drop this patch and just include fix for
> > power_of_2.
> Please separate the compiler warning fixes from the performance enhancing
> fixes.
> They shouldn't be mashed together.
>
> Neil
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
@ 2014-12-12 23:03 r k
  0 siblings, 0 replies; 12+ messages in thread
From: r k @ 2014-12-12 23:03 UTC (permalink / raw)
  To: dev

Ravi Kerur (1):
  Minor fixes in rte_common.h file.

 lib/librte_eal/common/include/rte_common.h | 6 +++---
 lib/librte_pmd_e1000/igb_pf.c              | 4 ++--
 lib/librte_pmd_ixgbe/ixgbe_pf.c            | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

--
1.9.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-12-19 13:28 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-12 23:04 [dpdk-dev] [PATCH] Minor fixes in rte_common.h file r k
2014-12-13 10:39 ` Neil Horman
2014-12-16 16:46   ` Ravi Kerur
2014-12-16 17:23     ` Ananyev, Konstantin
2014-12-16 20:13       ` Ravi Kerur
     [not found]         ` <2601191342CEEE43887BDE71AB977258213C1499@IRSMSX105.ger.corp.intel.com>
2014-12-17  1:05           ` Ananyev, Konstantin
2014-12-17 16:28             ` Ravi Kerur
2014-12-16 21:40     ` Neil Horman
2014-12-17 16:40       ` Ravi Kerur
2014-12-18 19:07         ` Neil Horman
2014-12-19 13:28           ` Ravi Kerur
  -- strict thread matches above, loose matches on Subject: below --
2014-12-12 23:03 r k

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).