[dpdk-dev] rte_mbuf library likely()/unlikely()

DPDK patches and discussions
 help / color / mirror / Atom feed

* [dpdk-dev] rte_mbuf library likely()/unlikely()
@ 2018-07-23 13:53 Morten Brørup
  2018-07-23 17:37 ` Stephen Hemminger
  2018-07-23 17:51 ` Honnappa Nagarahalli
  0 siblings, 2 replies; 11+ messages in thread
From: Morten Brørup @ 2018-07-23 13:53 UTC (permalink / raw)
  To: Olivier Matz; +Cc: dev

Hi Olivier,

 

I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I went through the entire library. Here are my suggested modifications.

 

 

diff -bu rte_mbuf.c.orig rte_mbuf.c

--- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200

+++ rte_mbuf.c  2018-07-23 15:32:53.000000000 +0200

@@ -173,19 +173,19 @@

{

        unsigned int nb_segs, pkt_len;

 

-       if (m == NULL)

+       if (unlikely(m == NULL))

                rte_panic("mbuf is NULL\n");

 

        /* generic checks */

-       if (m->pool == NULL)

+       if (unlikely(m->pool == NULL))

                rte_panic("bad mbuf pool\n");

-       if (m->buf_iova == 0)

+       if (unlikely(m->buf_iova == 0))

                rte_panic("bad IO addr\n");

-       if (m->buf_addr == NULL)

+       if (unlikely(m->buf_addr == NULL))

                rte_panic("bad virt addr\n");

 

        uint16_t cnt = rte_mbuf_refcnt_read(m);

-       if ((cnt == 0) || (cnt == UINT16_MAX))

+       if (unlikely((cnt == 0) || (cnt == UINT16_MAX)))

                rte_panic("bad ref cnt\n");

 

        /* nothing to check for sub-segments */

@@ -193,7 +193,7 @@

                return;

 

        /* data_len is supposed to be not more than pkt_len */

-       if (m->data_len > m->pkt_len)

+       if (unlikely(m->data_len > m->pkt_len))

                rte_panic("bad data_len\n");

 

        nb_segs = m->nb_segs;

@@ -204,9 +204,9 @@

                pkt_len -= m->data_len;

        } while ((m = m->next) != NULL);

 

-       if (nb_segs)

+       if (unlikely(nb_segs))

                rte_panic("bad nb_segs\n");

-       if (pkt_len)

+       if (unlikely(pkt_len))

                rte_panic("bad pkt_len\n");

}

 

@@ -249,7 +249,7 @@

        const struct rte_mbuf *seg = m;

        uint32_t buf_off = 0, copy_len;

 

-       if (off + len > rte_pktmbuf_pkt_len(m))

+       if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))

                return NULL;

 

        while (off >= rte_pktmbuf_data_len(seg)) {

@@ -257,7 +257,7 @@

                seg = seg->next;

        }

 

-       if (off + len <= rte_pktmbuf_data_len(seg))

+       if (likely(off + len <= rte_pktmbuf_data_len(seg)))

                return rte_pktmbuf_mtod_offset(seg, char *, off);

 

        /* rare case: header is split among several segments */

@@ -344,7 +344,7 @@

        unsigned int i;

        int ret;

 

-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;

 

        buf[0] = '\0';

@@ -355,9 +355,9 @@

                if (name == NULL)

                        name = rx_flags[i].default_name;

                ret = snprintf(buf, buflen, "%s ", name);

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -440,7 +440,7 @@

        unsigned int i;

        int ret;

 

-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;

 

        buf[0] = '\0';

@@ -451,9 +451,9 @@

                if (name == NULL)

                        name = tx_flags[i].default_name;

                ret = snprintf(buf, buflen, "%s ", name);

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

 

 

diff -bu rte_mbuf.h.orig rte_mbuf.h

--- rte_mbuf.h.orig     2018-07-23 15:13:26.000000000 +0200

+++ rte_mbuf.h  2018-07-23 15:24:25.000000000 +0200

@@ -1007,7 +1007,7 @@

{

        struct rte_mbuf *m;

 

-       if (rte_mempool_get(mp, (void **)&m) < 0)

+       if (unlikely(rte_mempool_get(mp, (void **)&m) < 0))

                return NULL;

        MBUF_RAW_ALLOC_CHECK(m);

        return m;

@@ -1268,7 +1268,7 @@

static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)

{

        struct rte_mbuf *m;

-       if ((m = rte_mbuf_raw_alloc(mp)) != NULL)

+       if (likely((m = rte_mbuf_raw_alloc(mp)) != NULL))

                rte_pktmbuf_reset(m);

        return m;

}

@@ -1696,7 +1696,7 @@

{

        struct rte_mbuf *m_next;

 

-       if (m != NULL)

+       if (likely(m != NULL))

                __rte_mbuf_sanity_check(m, 1);

 

        while (m != NULL) {

@@ -2099,7 +2099,7 @@

        struct rte_mbuf *cur_tail;

 

        /* Check for number-of-segments-overflow */

-       if (head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)

+       if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))

                return -EOVERFLOW;

 

        /* Chain 'tail' onto the old tail */

@@ -2147,28 +2147,28 @@

                                  m->outer_l3_len;

 

        /* Headers are fragmented */

-       if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)

+       if (unlikely(rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len))

                return -ENOTSUP;

 

        /* IP checksum can be counted only for IPv4 packet */

-       if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))

+       if (unlikely((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)))

                return -EINVAL;

 

        /* IP type not set when required */

        if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))

-               if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))

+               if (unlikely(!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))))

                        return -EINVAL;

 

        /* Check requirements for TSO packet */

        if (ol_flags & PKT_TX_TCP_SEG)

-               if ((m->tso_segsz == 0) ||

+               if (unlikely((m->tso_segsz == 0) ||

                                ((ol_flags & PKT_TX_IPV4) &&

-                               !(ol_flags & PKT_TX_IP_CKSUM)))

+                               !(ol_flags & PKT_TX_IP_CKSUM))))

                        return -EINVAL;

 

        /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */

-       if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&

-                       !(ol_flags & PKT_TX_OUTER_IPV4))

+       if (unlikely((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&

+                       !(ol_flags & PKT_TX_OUTER_IPV4)))

                return -EINVAL;

 

        return 0;

 

 

diff -bu rte_mbuf_ptype.c.orig rte_mbuf_ptype.c

--- rte_mbuf_ptype.c.orig       2018-07-23 15:45:49.000000000 +0200

+++ rte_mbuf_ptype.c    2018-07-23 15:44:59.000000000 +0200

@@ -118,15 +118,15 @@

{

        int ret;

 

-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;

 

        buf[0] = '\0';

        if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) {

                ret = snprintf(buf, buflen, "UNKNOWN");

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                       return -1;

                return 0;

        }

@@ -134,9 +134,9 @@

        if ((ptype & RTE_PTYPE_L2_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l2_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -144,9 +144,9 @@

        if ((ptype & RTE_PTYPE_L3_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l3_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -154,9 +154,9 @@

        if ((ptype & RTE_PTYPE_L4_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l4_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -164,9 +164,9 @@

        if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_tunnel_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -174,9 +174,9 @@

        if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l2_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -184,9 +184,9 @@

        if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l3_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -194,9 +194,9 @@

        if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l4_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

 

 

 

Med venlig hilsen / kind regards

 

Morten Brørup

CTO

 

 

SmartShare Systems A/S

Tonsbakken 16-18

DK-2740 Skovlunde

Denmark

 

Office      +45 70 20 00 93

Direct      +45 89 93 50 22

Mobile     +45 25 40 82 12

 

mb@smartsharesystems.com <mailto:mb@smartsharesystems.com> 

www.smartsharesystems.com <https://www.smartsharesystems.com/> 

 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 13:53 [dpdk-dev] rte_mbuf library likely()/unlikely() Morten Brørup
@ 2018-07-23 17:37 ` Stephen Hemminger
  2018-07-23 18:59   ` Morten Brørup
  2018-07-23 17:51 ` Honnappa Nagarahalli
  1 sibling, 1 reply; 11+ messages in thread
From: Stephen Hemminger @ 2018-07-23 17:37 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Olivier Matz, dev

On Mon, 23 Jul 2018 15:53:42 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> Hi Olivier,
> 
>  
> 
> I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I went through the entire library. Here are my suggested modifications.
> 
>  
> 
>  
> 
> diff -bu rte_mbuf.c.orig rte_mbuf.c
> 
> --- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200
> 
> +++ rte_mbuf.c  2018-07-23 15:32:53.000000000 +0200
> 
> @@ -173,19 +173,19 @@
> 
> {
> 
>         unsigned int nb_segs, pkt_len;
> 
>  
> 
> -       if (m == NULL)
> 
> +       if (unlikely(m == NULL))
> 
>                 rte_panic("mbuf is NULL\n");
> 
>  

Adding is unlikely is not necessary since rte_panic is marked with cold attribute
which has the same effect.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 17:37 ` Stephen Hemminger
@ 2018-07-23 18:59   ` Morten Brørup
  2018-07-23 19:45     ` Stephen Hemminger
  0 siblings, 1 reply; 11+ messages in thread
From: Morten Brørup @ 2018-07-23 18:59 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Olivier Matz, dev


> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Monday, July 23, 2018 7:38 PM
> To: Morten Brørup
> Cc: Olivier Matz; dev@dpdk.org
> Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
> 
> On Mon, 23 Jul 2018 15:53:42 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Hi Olivier,
> >
> >
> >
> > I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I
> went through the entire library. Here are my suggested modifications.
> >
> >
> >
> >
> >
> > diff -bu rte_mbuf.c.orig rte_mbuf.c
> >
> > --- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200
> >
> > +++ rte_mbuf.c  2018-07-23 15:32:53.000000000 +0200
> >
> > @@ -173,19 +173,19 @@
> >
> > {
> >
> >         unsigned int nb_segs, pkt_len;
> >
> >
> >
> > -       if (m == NULL)
> >
> > +       if (unlikely(m == NULL))
> >
> >                 rte_panic("mbuf is NULL\n");
> >
> >
> 
> Adding is unlikely is not necessary since rte_panic is marked with cold
> attribute
> which has the same effect.

I was not aware of this. Although it is not visible from the source code files using rte_panic(), it probably means we shouldn't as so much as I thought. Here's an updated patch for rte_mbuf.c, where it is relevant. The other two suggested patches are unaffected.

diff -bu rte_mbuf.c.orig rte_mbuf.c
--- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200
+++ rte_mbuf.c  2018-07-23 20:52:35.000000000 +0200
@@ -249,7 +249,7 @@
        const struct rte_mbuf *seg = m;
        uint32_t buf_off = 0, copy_len;

-       if (off + len > rte_pktmbuf_pkt_len(m))
+       if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
                return NULL;

        while (off >= rte_pktmbuf_data_len(seg)) {
@@ -257,7 +257,7 @@
                seg = seg->next;
        }

-       if (off + len <= rte_pktmbuf_data_len(seg))
+       if (likely(off + len <= rte_pktmbuf_data_len(seg)))
                return rte_pktmbuf_mtod_offset(seg, char *, off);

        /* rare case: header is split among several segments */
@@ -344,7 +344,7 @@
        unsigned int i;
        int ret;

-       if (buflen == 0)
+       if (unlikely(buflen == 0))
                return -1;

        buf[0] = '\0';
@@ -355,9 +355,9 @@
                if (name == NULL)
                        name = rx_flags[i].default_name;
                ret = snprintf(buf, buflen, "%s ", name);
-               if (ret < 0)
+               if (unlikely(ret < 0))
                        return -1;
-               if ((size_t)ret >= buflen)
+               if (unlikely((size_t)ret >= buflen))
                        return -1;
                buf += ret;
                buflen -= ret;
@@ -440,7 +440,7 @@
        unsigned int i;
        int ret;

-       if (buflen == 0)
+       if (unlikely(buflen == 0))
                return -1;

        buf[0] = '\0';
@@ -451,9 +451,9 @@
                if (name == NULL)
                        name = tx_flags[i].default_name;
                ret = snprintf(buf, buflen, "%s ", name);
-               if (ret < 0)
+               if (unlikely(ret < 0))
                        return -1;
-               if ((size_t)ret >= buflen)
+               if (unlikely((size_t)ret >= buflen))
                        return -1;
                buf += ret;
                buflen -= ret;


Med venlig hilsen / kind regards
- Morten Brørup

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 18:59   ` Morten Brørup
@ 2018-07-23 19:45     ` Stephen Hemminger
  0 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2018-07-23 19:45 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Olivier Matz, dev

On Mon, 23 Jul 2018 20:59:29 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Monday, July 23, 2018 7:38 PM
> > To: Morten Brørup
> > Cc: Olivier Matz; dev@dpdk.org
> > Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
> > 
> > On Mon, 23 Jul 2018 15:53:42 +0200
> > Morten Brørup <mb@smartsharesystems.com> wrote:
> >   
> > > Hi Olivier,
> > >
> > >
> > >
> > > I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I  
> > went through the entire library. Here are my suggested modifications.  
> > >
> > >
> > >
> > >
> > >
> > > diff -bu rte_mbuf.c.orig rte_mbuf.c
> > >
> > > --- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200
> > >
> > > +++ rte_mbuf.c  2018-07-23 15:32:53.000000000 +0200
> > >
> > > @@ -173,19 +173,19 @@
> > >
> > > {
> > >
> > >         unsigned int nb_segs, pkt_len;
> > >
> > >
> > >
> > > -       if (m == NULL)
> > >
> > > +       if (unlikely(m == NULL))
> > >
> > >                 rte_panic("mbuf is NULL\n");
> > >
> > >  
> > 
> > Adding is unlikely is not necessary since rte_panic is marked with cold
> > attribute
> > which has the same effect.  
> 
> I was not aware of this. Although it is not visible from the source code files using rte_panic(), it probably means we shouldn't as so much as I thought. Here's an updated patch for rte_mbuf.c, where it is relevant. The other two suggested patches are unaffected.
> 
> diff -bu rte_mbuf.c.orig rte_mbuf.c
> --- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200
> +++ rte_mbuf.c  2018-07-23 20:52:35.000000000 +0200
> @@ -249,7 +249,7 @@
>         const struct rte_mbuf *seg = m;
>         uint32_t buf_off = 0, copy_len;
> 
> -       if (off + len > rte_pktmbuf_pkt_len(m))
> +       if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
>                 return NULL;
> 
>         while (off >= rte_pktmbuf_data_len(seg)) {
> @@ -257,7 +257,7 @@
>                 seg = seg->next;
>         }
> 
> -       if (off + len <= rte_pktmbuf_data_len(seg))
> +       if (likely(off + len <= rte_pktmbuf_data_len(seg)))
>                 return rte_pktmbuf_mtod_offset(seg, char *, off);
> 
>         /* rare case: header is split among several segments */
> @@ -344,7 +344,7 @@
>         unsigned int i;
>         int ret;
> 
> -       if (buflen == 0)
> +       if (unlikely(buflen == 0))
>                 return -1;
> 
>         buf[0] = '\0';
> @@ -355,9 +355,9 @@
>                 if (name == NULL)
>                         name = rx_flags[i].default_name;
>                 ret = snprintf(buf, buflen, "%s ", name);
> -               if (ret < 0)
> +               if (unlikely(ret < 0))
>                         return -1;
> -               if ((size_t)ret >= buflen)
> +               if (unlikely((size_t)ret >= buflen))
>                         return -1;
>                 buf += ret;
>                 buflen -= ret;
> @@ -440,7 +440,7 @@
>         unsigned int i;
>         int ret;
> 
> -       if (buflen == 0)
> +       if (unlikely(buflen == 0))
>                 return -1;
> 
>         buf[0] = '\0';
> @@ -451,9 +451,9 @@
>                 if (name == NULL)
>                         name = tx_flags[i].default_name;
>                 ret = snprintf(buf, buflen, "%s ", name);
> -               if (ret < 0)
> +               if (unlikely(ret < 0))
>                         return -1;
> -               if ((size_t)ret >= buflen)
> +               if (unlikely((size_t)ret >= buflen))
>                         return -1;
>                 buf += ret;
>                 buflen -= ret;
> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup

Yes, this makes sense.

Please format patch with signed-off-by and submit according to
the contributing guidelines Creating Patches section.

https://doc.dpdk.org/guides/contributing/patches.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 13:53 [dpdk-dev] rte_mbuf library likely()/unlikely() Morten Brørup
  2018-07-23 17:37 ` Stephen Hemminger
@ 2018-07-23 17:51 ` Honnappa Nagarahalli
  2018-07-23 19:09   ` Morten Brørup
  1 sibling, 1 reply; 11+ messages in thread
From: Honnappa Nagarahalli @ 2018-07-23 17:51 UTC (permalink / raw)
  To: Morten Brørup, Olivier Matz; +Cc: dev

Do you see any performance improvements with these changes?

-----Original Message-----
From: dev <dev-bounces@dpdk.org> On Behalf Of Morten Brørup
Sent: Monday, July 23, 2018 8:54 AM
To: Olivier Matz <olivier.matz@6wind.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] rte_mbuf library likely()/unlikely()

Hi Olivier,



I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I went through the entire library. Here are my suggested modifications.





diff -bu rte_mbuf.c.orig rte_mbuf.c

--- rte_mbuf.c.orig     2018-07-23 15:13:22.000000000 +0200

+++ rte_mbuf.c  2018-07-23 15:32:53.000000000 +0200

@@ -173,19 +173,19 @@

{

        unsigned int nb_segs, pkt_len;



-       if (m == NULL)

+       if (unlikely(m == NULL))

                rte_panic("mbuf is NULL\n");



        /* generic checks */

-       if (m->pool == NULL)

+       if (unlikely(m->pool == NULL))

                rte_panic("bad mbuf pool\n");

-       if (m->buf_iova == 0)

+       if (unlikely(m->buf_iova == 0))

                rte_panic("bad IO addr\n");

-       if (m->buf_addr == NULL)

+       if (unlikely(m->buf_addr == NULL))

                rte_panic("bad virt addr\n");



        uint16_t cnt = rte_mbuf_refcnt_read(m);

-       if ((cnt == 0) || (cnt == UINT16_MAX))

+       if (unlikely((cnt == 0) || (cnt == UINT16_MAX)))

                rte_panic("bad ref cnt\n");



        /* nothing to check for sub-segments */

@@ -193,7 +193,7 @@

                return;



        /* data_len is supposed to be not more than pkt_len */

-       if (m->data_len > m->pkt_len)

+       if (unlikely(m->data_len > m->pkt_len))

                rte_panic("bad data_len\n");



        nb_segs = m->nb_segs;

@@ -204,9 +204,9 @@

                pkt_len -= m->data_len;

        } while ((m = m->next) != NULL);



-       if (nb_segs)

+       if (unlikely(nb_segs))

                rte_panic("bad nb_segs\n");

-       if (pkt_len)

+       if (unlikely(pkt_len))

                rte_panic("bad pkt_len\n");

}



@@ -249,7 +249,7 @@

        const struct rte_mbuf *seg = m;

        uint32_t buf_off = 0, copy_len;



-       if (off + len > rte_pktmbuf_pkt_len(m))

+       if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))

                return NULL;



        while (off >= rte_pktmbuf_data_len(seg)) {

@@ -257,7 +257,7 @@

                seg = seg->next;

        }



-       if (off + len <= rte_pktmbuf_data_len(seg))

+       if (likely(off + len <= rte_pktmbuf_data_len(seg)))

                return rte_pktmbuf_mtod_offset(seg, char *, off);



        /* rare case: header is split among several segments */

@@ -344,7 +344,7 @@

        unsigned int i;

        int ret;



-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;



        buf[0] = '\0';

@@ -355,9 +355,9 @@

                if (name == NULL)

                        name = rx_flags[i].default_name;

                ret = snprintf(buf, buflen, "%s ", name);

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -440,7 +440,7 @@

        unsigned int i;

        int ret;



-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;



        buf[0] = '\0';

@@ -451,9 +451,9 @@

                if (name == NULL)

                        name = tx_flags[i].default_name;

                ret = snprintf(buf, buflen, "%s ", name);

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;





diff -bu rte_mbuf.h.orig rte_mbuf.h

--- rte_mbuf.h.orig     2018-07-23 15:13:26.000000000 +0200

+++ rte_mbuf.h  2018-07-23 15:24:25.000000000 +0200

@@ -1007,7 +1007,7 @@

{

        struct rte_mbuf *m;



-       if (rte_mempool_get(mp, (void **)&m) < 0)

+       if (unlikely(rte_mempool_get(mp, (void **)&m) < 0))

                return NULL;

        MBUF_RAW_ALLOC_CHECK(m);

        return m;

@@ -1268,7 +1268,7 @@

static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)

{

        struct rte_mbuf *m;

-       if ((m = rte_mbuf_raw_alloc(mp)) != NULL)

+       if (likely((m = rte_mbuf_raw_alloc(mp)) != NULL))

                rte_pktmbuf_reset(m);

        return m;

}

@@ -1696,7 +1696,7 @@

{

        struct rte_mbuf *m_next;



-       if (m != NULL)

+       if (likely(m != NULL))

                __rte_mbuf_sanity_check(m, 1);



        while (m != NULL) {

@@ -2099,7 +2099,7 @@

        struct rte_mbuf *cur_tail;



        /* Check for number-of-segments-overflow */

-       if (head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)

+       if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))

                return -EOVERFLOW;



        /* Chain 'tail' onto the old tail */

@@ -2147,28 +2147,28 @@

                                  m->outer_l3_len;



        /* Headers are fragmented */

-       if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)

+       if (unlikely(rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len))

                return -ENOTSUP;



        /* IP checksum can be counted only for IPv4 packet */

-       if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))

+       if (unlikely((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)))

                return -EINVAL;



        /* IP type not set when required */

        if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))

-               if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))

+               if (unlikely(!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))))

                        return -EINVAL;



        /* Check requirements for TSO packet */

        if (ol_flags & PKT_TX_TCP_SEG)

-               if ((m->tso_segsz == 0) ||

+               if (unlikely((m->tso_segsz == 0) ||

                                ((ol_flags & PKT_TX_IPV4) &&

-                               !(ol_flags & PKT_TX_IP_CKSUM)))

+                               !(ol_flags & PKT_TX_IP_CKSUM))))

                        return -EINVAL;



        /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */

-       if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&

-                       !(ol_flags & PKT_TX_OUTER_IPV4))

+       if (unlikely((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&

+                       !(ol_flags & PKT_TX_OUTER_IPV4)))

                return -EINVAL;



        return 0;





diff -bu rte_mbuf_ptype.c.orig rte_mbuf_ptype.c

--- rte_mbuf_ptype.c.orig       2018-07-23 15:45:49.000000000 +0200

+++ rte_mbuf_ptype.c    2018-07-23 15:44:59.000000000 +0200

@@ -118,15 +118,15 @@

{

        int ret;



-       if (buflen == 0)

+       if (unlikely(buflen == 0))

                return -1;



        buf[0] = '\0';

        if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) {

                ret = snprintf(buf, buflen, "UNKNOWN");

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                       return -1;

                return 0;

        }

@@ -134,9 +134,9 @@

        if ((ptype & RTE_PTYPE_L2_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l2_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -144,9 +144,9 @@

        if ((ptype & RTE_PTYPE_L3_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l3_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -154,9 +154,9 @@

        if ((ptype & RTE_PTYPE_L4_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_l4_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -164,9 +164,9 @@

        if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_tunnel_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -174,9 +174,9 @@

        if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l2_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -184,9 +184,9 @@

        if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l3_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;

@@ -194,9 +194,9 @@

        if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) {

                ret = snprintf(buf, buflen, "%s ",

                        rte_get_ptype_inner_l4_name(ptype));

-               if (ret < 0)

+               if (unlikely(ret < 0))

                        return -1;

-               if ((size_t)ret >= buflen)

+               if (unlikely((size_t)ret >= buflen))

                        return -1;

                buf += ret;

                buflen -= ret;







Med venlig hilsen / kind regards



Morten Brørup

CTO





SmartShare Systems A/S

Tonsbakken 16-18

DK-2740 Skovlunde

Denmark



Office      +45 70 20 00 93

Direct      +45 89 93 50 22

Mobile     +45 25 40 82 12



mb@smartsharesystems.com <mailto:mb@smartsharesystems.com>

www.smartsharesystems.com <https://www.smartsharesystems.com/>



IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 17:51 ` Honnappa Nagarahalli
@ 2018-07-23 19:09   ` Morten Brørup
  2018-07-23 22:40     ` Wiles, Keith
  0 siblings, 1 reply; 11+ messages in thread
From: Morten Brørup @ 2018-07-23 19:09 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Olivier Matz; +Cc: dev

I haven't performance tested, but they are compiler branch prediction hints pointing out the most likely execution path, so I expect them to have a positive effect.

E.g. the first comparison in __rte_pktmbuf_read() is very unlikely to be true - it would mean that the application is trying to read data beyond the packet.

Please also refer to:
https://cellperformance.beyond3d.com/articles/2006/04/branch-patterns-using-gcc.html


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Honnappa Nagarahalli
> Sent: Monday, July 23, 2018 7:52 PM
> To: Morten Brørup; Olivier Matz
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
> 
> Do you see any performance improvements with these changes?
> 
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Morten Brørup
> Sent: Monday, July 23, 2018 8:54 AM
> To: Olivier Matz <olivier.matz@6wind.com>
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] rte_mbuf library likely()/unlikely()
> 
> Hi Olivier,
> 
> 
> 
> I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I went
> through the entire library. Here are my suggested modifications.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 19:09   ` Morten Brørup
@ 2018-07-23 22:40     ` Wiles, Keith
  2018-07-24  7:29       ` Olivier Matz
  0 siblings, 1 reply; 11+ messages in thread
From: Wiles, Keith @ 2018-07-23 22:40 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Honnappa Nagarahalli, Olivier Matz, dev



> On Jul 23, 2018, at 2:09 PM, Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> I haven't performance tested, but they are compiler branch prediction hints pointing out the most likely execution path, so I expect them to have a positive effect.

We really need to make sure this provides any performance improvement and that means it needs to be tested on a number of systems. Can you please do some performance testing or see if we can get the guys doing DPDK performance testing to first give this a try? This area is very sensitive to tweaking.

> 
> E.g. the first comparison in __rte_pktmbuf_read() is very unlikely to be true - it would mean that the application is trying to read data beyond the packet.
> 
> Please also refer to:
> https://cellperformance.beyond3d.com/articles/2006/04/branch-patterns-using-gcc.html
> 
> 
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Honnappa Nagarahalli
>> Sent: Monday, July 23, 2018 7:52 PM
>> To: Morten Brørup; Olivier Matz
>> Cc: dev@dpdk.org
>> Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
>> 
>> Do you see any performance improvements with these changes?
>> 
>> -----Original Message-----
>> From: dev <dev-bounces@dpdk.org> On Behalf Of Morten Brørup
>> Sent: Monday, July 23, 2018 8:54 AM
>> To: Olivier Matz <olivier.matz@6wind.com>
>> Cc: dev@dpdk.org
>> Subject: [dpdk-dev] rte_mbuf library likely()/unlikely()
>> 
>> Hi Olivier,
>> 
>> 
>> 
>> I noticed that __rte_pktmbuf_read() could do with an unlikely(), so I went
>> through the entire library. Here are my suggested modifications.
>> 

Regards,
Keith


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-23 22:40     ` Wiles, Keith
@ 2018-07-24  7:29       ` Olivier Matz
  2018-07-24  8:13         ` Morten Brørup
  0 siblings, 1 reply; 11+ messages in thread
From: Olivier Matz @ 2018-07-24  7:29 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: Morten Brørup, Honnappa Nagarahalli, dev

Hi,

On Mon, Jul 23, 2018 at 10:40:03PM +0000, Wiles, Keith wrote:
> 
> 
> > On Jul 23, 2018, at 2:09 PM, Morten Brørup <mb@smartsharesystems.com> wrote:
> > 
> > I haven't performance tested, but they are compiler branch prediction hints pointing out the most likely execution path, so I expect them to have a positive effect.
> 
> We really need to make sure this provides any performance improvement and that means it needs to be tested on a number of systems. Can you please do some performance testing or see if we can get the guys doing DPDK performance testing to first give this a try? This area is very sensitive to tweaking.

I agree we should be driven by performance improvements.

I remember a discussion with Bruce on the ML saying that hardware branch
predictors generally do a good job.

Thanks,
Olivier

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-24  7:29       ` Olivier Matz
@ 2018-07-24  8:13         ` Morten Brørup
  2018-07-24 11:31           ` Van Haaren, Harry
  0 siblings, 1 reply; 11+ messages in thread
From: Morten Brørup @ 2018-07-24  8:13 UTC (permalink / raw)
  To: Olivier Matz, Wiles, Keith; +Cc: Honnappa Nagarahalli, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> Sent: Tuesday, July 24, 2018 9:29 AM
> 
> Hi,
> 
> On Mon, Jul 23, 2018 at 10:40:03PM +0000, Wiles, Keith wrote:
> >
> >
> > > On Jul 23, 2018, at 2:09 PM, Morten Brørup
> <mb@smartsharesystems.com> wrote:
> > >
> > > I haven't performance tested, but they are compiler branch
> prediction hints pointing out the most likely execution path, so I
> expect them to have a positive effect.
> >
> > We really need to make sure this provides any performance improvement
> and that means it needs to be tested on a number of systems. Can you
> please do some performance testing or see if we can get the guys doing
> DPDK performance testing to first give this a try? This area is very
> sensitive to tweaking.
> 
> I agree we should be driven by performance improvements.

Which is why I suggested these changes. Theoretically, they will provide a performance improvement. The most likely execution path is obvious from code review.

> I remember a discussion with Bruce on the ML saying that hardware
> branch
> predictors generally do a good job.

They do, and it is very well documented. E.g. here's a really interesting historical review about branch predictors:
https://danluu.com/branch-prediction/

However, just because hardware branch predictors are pretty good, I don't think we should remove or stop adding likely()/unlikely() and other branch prediction hints. The hints still add value, both for execution speed and for source code readability.

Please also refer to the other link I provided about GCC branches. It basically says that GCC treats an If-sentence like this:

If (Condition) Then
	Expect to execute this
Else
	Do not expect to execute this

So if we don't want unlikely() around an if-condition which probably evaluates to false, we should rewrite the execution order accordingly.

Although hardware branch predictors help a lot most of the time, the likely()/unlikely() still helps the first time the CPU executes the branch instruction.

Furthermore, I'm very well aware of the rule of thumb for adding likely()/unlikely(): Don't add one if it isn't correct almost every time the branch is considered.

How much more compiler branch prediction hints adds to hardware compiler branch prediction is a somewhat academic discussion. But Honnappa and Keith are right: Performance improvements should be performance tested.

Unfortunately, I don't have the equipment or resources to perform a usable performance test, so I submitted the changes to the mailing list for code review instead. And I'm certainly getting code reviewed now. :-)

From a code review perspective, someone else than me might observe that the exception handling execution path is "missing" the unlikely() hint, so I would argue that code readability is an argument for adding it - unless performance testing shows a slowdown.

Med venlig hilsen / kind regards
- Morten Brørup

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-24  8:13         ` Morten Brørup
@ 2018-07-24 11:31           ` Van Haaren, Harry
  2018-07-24 13:02             ` Wiles, Keith
  0 siblings, 1 reply; 11+ messages in thread
From: Van Haaren, Harry @ 2018-07-24 11:31 UTC (permalink / raw)
  To: Morten Brørup, Olivier Matz, Wiles, Keith; +Cc: Honnappa Nagarahalli, dev

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Morten Brørup
> Sent: Tuesday, July 24, 2018 9:14 AM
> To: Olivier Matz <olivier.matz@6wind.com>; Wiles, Keith
> <keith.wiles@intel.com>
> Cc: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
> > Sent: Tuesday, July 24, 2018 9:29 AM
> >
> > Hi,
> >
> > On Mon, Jul 23, 2018 at 10:40:03PM +0000, Wiles, Keith wrote:
> > >
> > >
> > > > On Jul 23, 2018, at 2:09 PM, Morten Brørup
> > <mb@smartsharesystems.com> wrote:
> > > >
> > > > I haven't performance tested, but they are compiler branch
> > prediction hints pointing out the most likely execution path, so I
> > expect them to have a positive effect.
> > >
> > > We really need to make sure this provides any performance improvement
> > and that means it needs to be tested on a number of systems. Can you
> > please do some performance testing or see if we can get the guys doing
> > DPDK performance testing to first give this a try? This area is very
> > sensitive to tweaking.
> >
> > I agree we should be driven by performance improvements.
> 
> Which is why I suggested these changes. Theoretically, they will provide a
> performance improvement. The most likely execution path is obvious from code
> review.

> > I remember a discussion with Bruce on the ML saying that hardware
> > branch predictors generally do a good job.
> 
> They do, and it is very well documented. E.g. here's a really interesting
> historical review about branch predictors:
> https://danluu.com/branch-prediction/
> 
> However, just because hardware branch predictors are pretty good, I don't
> think we should remove or stop adding likely()/unlikely() and other branch
> prediction hints. The hints still add value, both for execution speed and
> for source code readability.

Although "likely" and "unlikely" sound like they predict branch-taken
or branch-not-taken, in practice the UNLIKELY() macro expands to __builtin_expect().

The compiler uses __builtin_expect() to understand what code is expected to
run most of the time. In *practice* what this means is that the code that is
not expected to run gets moved "far away" in the binary itself. The compiler
can layout code to be better with the "static branch prediction" methods (see below).

Think of UNLIKELY() not as helping a runtime branch predictor, it doesn't.
Think of UNLIKELY() as a method to move error-handling instructions to cold
instruction-cache-lines.

To put it another way, using UNLIKELY() puts the unlikely branch code far away,
and there's a chance we're going to pay an i-cache miss for that.

As a summary:
UNLIKELY() for *error handling* is good, it moves cold instructions out of hot i-cache
UNLIKELY() on anything that is valid to happen (e.g. ring dequeue == 0) is not advised,
as we could cause i-cache misses on the data path. 

> Please also refer to the other link I provided about GCC branches. It
> basically says that GCC treats an If-sentence like this:
> 
> If (Condition) Then
> 	Expect to execute this
> Else
> 	Do not expect to execute this
> 
> So if we don't want unlikely() around an if-condition which probably
> evaluates to false, we should rewrite the execution order accordingly.

In the case of error handling, this is correct - and the compiler will handle it.
If both taken and not-taken occur at runtime, let the runtime branch predictor handle it.

> Although hardware branch predictors help a lot most of the time, the
> likely()/unlikely() still helps the first time the CPU executes the branch
> instruction.

There is a static branch prediction scheme that predicts branches that are
executing for the first time. It is detailed in the Software Optimization Manual,
Section 3.4 "Optimizing the front end", Page 103 "Static Branch Prediction Algorithm":

https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf

UNLIKELY() tells GCC which layout to use - so it does help somewhat, but the
price of a single i-cache miss is much higher than multiple branch-misses...

> Furthermore, I'm very well aware of the rule of thumb for adding
> likely()/unlikely(): Don't add one if it isn't correct almost every time the
> branch is considered.

I would word this stronger: if both taken and not-taken are *valid*, don't use UNLIKELY().
Use UNLIKELY() for error handling cases only.

> How much more compiler branch prediction hints adds to hardware compiler
> branch prediction is a somewhat academic discussion. But Honnappa and Keith
> are right: Performance improvements should be performance tested.

+1 that ultimately it comes down to measured numbers, on a variety of platforms..

..but micro-benchmarks are not enough either. What if the whole micro-bench fits into
i-cache, it looks like there is no difference in perf. Then apply these UNLIKELY()
changes to a real world app, and the i-cache misses may become much more expensive.

> Unfortunately, I don't have the equipment or resources to perform a usable
> performance test, so I submitted the changes to the mailing list for code
> review instead. And I'm certainly getting code reviewed now. :-)
> 
> From a code review perspective, someone else than me might observe that the
> exception handling execution path is "missing" the unlikely() hint, so I
> would argue that code readability is an argument for adding it - unless
> performance testing shows a slowdown.
> 
> 
> Med venlig hilsen / kind regards
> - Morten Brørup

Unless you see a significant branch-miss bottleneck somewhere in the code,
my opinion is that we are trying to micro-optimize code, when there are probably
larger problems to solve.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
  2018-07-24 11:31           ` Van Haaren, Harry
@ 2018-07-24 13:02             ` Wiles, Keith
  0 siblings, 0 replies; 11+ messages in thread
From: Wiles, Keith @ 2018-07-24 13:02 UTC (permalink / raw)
  To: Van Haaren, Harry
  Cc: Morten Brørup, Olivier Matz, Honnappa Nagarahalli, dev



> On Jul 24, 2018, at 6:31 AM, Van Haaren, Harry <harry.van.haaren@intel.com> wrote:
> 
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Morten Brørup
>> Sent: Tuesday, July 24, 2018 9:14 AM
>> To: Olivier Matz <olivier.matz@6wind.com>; Wiles, Keith
>> <keith.wiles@intel.com>
>> Cc: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] rte_mbuf library likely()/unlikely()
>> 
>>> -----Original Message-----
>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Olivier Matz
>>> Sent: Tuesday, July 24, 2018 9:29 AM
>>> 
>>> Hi,
>>> 
>>> On Mon, Jul 23, 2018 at 10:40:03PM +0000, Wiles, Keith wrote:
>>>> 
>>>> 
>>>>> On Jul 23, 2018, at 2:09 PM, Morten Brørup
>>> <mb@smartsharesystems.com> wrote:
>>>>> 
>>>>> I haven't performance tested, but they are compiler branch
>>> prediction hints pointing out the most likely execution path, so I
>>> expect them to have a positive effect.
>>>> 
>>>> We really need to make sure this provides any performance improvement
>>> and that means it needs to be tested on a number of systems. Can you
>>> please do some performance testing or see if we can get the guys doing
>>> DPDK performance testing to first give this a try? This area is very
>>> sensitive to tweaking.
>>> 
>>> I agree we should be driven by performance improvements.
>> 
>> Which is why I suggested these changes. Theoretically, they will provide a
>> performance improvement. The most likely execution path is obvious from code
>> review.
> 
> 
>>> I remember a discussion with Bruce on the ML saying that hardware
>>> branch predictors generally do a good job.
>> 
>> They do, and it is very well documented. E.g. here's a really interesting
>> historical review about branch predictors:
>> https://danluu.com/branch-prediction/
>> 
>> However, just because hardware branch predictors are pretty good, I don't
>> think we should remove or stop adding likely()/unlikely() and other branch
>> prediction hints. The hints still add value, both for execution speed and
>> for source code readability.
> 
> 
> Although "likely" and "unlikely" sound like they predict branch-taken
> or branch-not-taken, in practice the UNLIKELY() macro expands to __builtin_expect().
> 
> The compiler uses __builtin_expect() to understand what code is expected to
> run most of the time. In *practice* what this means is that the code that is
> not expected to run gets moved "far away" in the binary itself. The compiler
> can layout code to be better with the "static branch prediction" methods (see below).
> 
> Think of UNLIKELY() not as helping a runtime branch predictor, it doesn't.
> Think of UNLIKELY() as a method to move error-handling instructions to cold
> instruction-cache-lines.
> 
> To put it another way, using UNLIKELY() puts the unlikely branch code far away,
> and there's a chance we're going to pay an i-cache miss for that.
> 
> As a summary:
> UNLIKELY() for *error handling* is good, it moves cold instructions out of hot i-cache
> UNLIKELY() on anything that is valid to happen (e.g. ring dequeue == 0) is not advised,
> as we could cause i-cache misses on the data path. 
> 
> 
>> Please also refer to the other link I provided about GCC branches. It
>> basically says that GCC treats an If-sentence like this:
>> 
>> If (Condition) Then
>> 	Expect to execute this
>> Else
>> 	Do not expect to execute this
>> 
>> So if we don't want unlikely() around an if-condition which probably
>> evaluates to false, we should rewrite the execution order accordingly.
> 
> In the case of error handling, this is correct - and the compiler will handle it.
> If both taken and not-taken occur at runtime, let the runtime branch predictor handle it.
> 
> 
>> Although hardware branch predictors help a lot most of the time, the
>> likely()/unlikely() still helps the first time the CPU executes the branch
>> instruction.
> 
> There is a static branch prediction scheme that predicts branches that are
> executing for the first time. It is detailed in the Software Optimization Manual,
> Section 3.4 "Optimizing the front end", Page 103 "Static Branch Prediction Algorithm":
> 
> https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
> 
> UNLIKELY() tells GCC which layout to use - so it does help somewhat, but the
> price of a single i-cache miss is much higher than multiple branch-misses...
> 
> 
>> Furthermore, I'm very well aware of the rule of thumb for adding
>> likely()/unlikely(): Don't add one if it isn't correct almost every time the
>> branch is considered.
> 
> I would word this stronger: if both taken and not-taken are *valid*, don't use UNLIKELY().
> Use UNLIKELY() for error handling cases only.
> 
> 
>> How much more compiler branch prediction hints adds to hardware compiler
>> branch prediction is a somewhat academic discussion. But Honnappa and Keith
>> are right: Performance improvements should be performance tested.
> 
> +1 that ultimately it comes down to measured numbers, on a variety of platforms..
> 
> ..but micro-benchmarks are not enough either. What if the whole micro-bench fits into
> i-cache, it looks like there is no difference in perf. Then apply these UNLIKELY()
> changes to a real world app, and the i-cache misses may become much more expensive.
> 
> 
>> Unfortunately, I don't have the equipment or resources to perform a usable
>> performance test, so I submitted the changes to the mailing list for code
>> review instead. And I'm certainly getting code reviewed now. :-)
>> 
>> From a code review perspective, someone else than me might observe that the
>> exception handling execution path is "missing" the unlikely() hint, so I
>> would argue that code readability is an argument for adding it - unless
>> performance testing shows a slowdown.
>> 
>> 
>> Med venlig hilsen / kind regards
>> - Morten Brørup
> 
> Unless you see a significant branch-miss bottleneck somewhere in the code,
> my opinion is that we are trying to micro-optimize code, when there are probably
> larger problems to solve.

+1

Regards,
Keith


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-07-24 13:03 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-23 13:53 [dpdk-dev] rte_mbuf library likely()/unlikely() Morten Brørup
2018-07-23 17:37 ` Stephen Hemminger
2018-07-23 18:59   ` Morten Brørup
2018-07-23 19:45     ` Stephen Hemminger
2018-07-23 17:51 ` Honnappa Nagarahalli
2018-07-23 19:09   ` Morten Brørup
2018-07-23 22:40     ` Wiles, Keith
2018-07-24  7:29       ` Olivier Matz
2018-07-24  8:13         ` Morten Brørup
2018-07-24 11:31           ` Van Haaren, Harry
2018-07-24 13:02             ` Wiles, Keith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).