From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id EE0563B5 for ; Tue, 16 Dec 2014 08:01:06 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 15 Dec 2014 23:00:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.07,584,1413270000"; d="scan'208";a="648273523" Received: from pgsmsx106.gar.corp.intel.com ([10.221.44.98]) by fmsmga002.fm.intel.com with ESMTP; 15 Dec 2014 23:00:55 -0800 Received: from shsmsx152.ccr.corp.intel.com (10.239.6.52) by PGSMSX106.gar.corp.intel.com (10.221.44.98) with Microsoft SMTP Server (TLS) id 14.3.195.1; Tue, 16 Dec 2014 15:00:29 +0800 Received: from shsmsx101.ccr.corp.intel.com ([169.254.1.110]) by SHSMSX152.ccr.corp.intel.com ([169.254.6.5]) with mapi id 14.03.0195.001; Tue, 16 Dec 2014 15:00:28 +0800 From: "Qiu, Michael" To: "Liang, Cunming" , "dev@dpdk.org" Thread-Topic: [dpdk-dev] [RFC PATCH 1/7] eal: add linear thread id as pthread-local variable Thread-Index: AQHQFOb/TzwPL1A/lUOiHJFTVJd/yg== Date: Tue, 16 Dec 2014 07:00:28 +0000 Message-ID: <533710CFB86FA344BFBF2D6802E60286CA02A6@SHSMSX101.ccr.corp.intel.com> References: <1418263490-21088-1-git-send-email-cunming.liang@intel.com> <1418263490-21088-2-git-send-email-cunming.liang@intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: x-originating-ip: [10.239.127.40] Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Subject: Re: [dpdk-dev] [RFC PATCH 1/7] eal: add linear thread id as pthread-local variable X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Dec 2014 07:01:07 -0000 On 12/11/2014 10:05 AM, Cunming Liang wrote:=0A= > Signed-off-by: Cunming Liang =0A= > ---=0A= > lib/librte_eal/common/include/rte_eal.h | 5 ++=0A= > lib/librte_eal/common/include/rte_lcore.h | 12 ++++=0A= > lib/librte_eal/linuxapp/eal/eal_thread.c | 115 ++++++++++++++++++++++++= ++++--=0A= > 3 files changed, 126 insertions(+), 6 deletions(-)=0A= >=0A= > diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/com= mon/include/rte_eal.h=0A= > index f4ecd2e..2640167 100644=0A= > --- a/lib/librte_eal/common/include/rte_eal.h=0A= > +++ b/lib/librte_eal/common/include/rte_eal.h=0A= > @@ -262,6 +262,11 @@ rte_set_application_usage_hook( rte_usage_hook_t usa= ge_func );=0A= > */=0A= > int rte_eal_has_hugepages(void);=0A= > =0A= > +#ifndef RTE_MAX_THREAD=0A= > +#define RTE_MAX_THREAD RTE_MAX_LCORE=0A= > +#endif=0A= > +=0A= > +=0A= > #ifdef __cplusplus=0A= > }=0A= > #endif=0A= > diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/c= ommon/include/rte_lcore.h=0A= > index 49b2c03..cd83d47 100644=0A= > --- a/lib/librte_eal/common/include/rte_lcore.h=0A= > +++ b/lib/librte_eal/common/include/rte_lcore.h=0A= > @@ -73,6 +73,7 @@ struct lcore_config {=0A= > extern struct lcore_config lcore_config[RTE_MAX_LCORE];=0A= > =0A= > RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per core "core id". */= =0A= > +RTE_DECLARE_PER_LCORE(unsigned, _thread_id); /**< Per thread "linear tid= ". */=0A= > =0A= > /**=0A= > * Return the ID of the execution unit we are running on.=0A= > @@ -86,6 +87,17 @@ rte_lcore_id(void)=0A= > }=0A= > =0A= > /**=0A= > + * Return the linear thread ID of the cache unit we are running on.=0A= > + * @return=0A= > + * core ID=0A= > + */=0A= > +static inline unsigned long=0A= > +rte_linear_thread_id(void)=0A= > +{=0A= > + return RTE_PER_LCORE(_thread_id);=0A= > +}=0A= > +=0A= > +/**=0A= > * Get the id of the master lcore=0A= > *=0A= > * @return=0A= > diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/li= nuxapp/eal/eal_thread.c=0A= > index 80a985f..52478d6 100644=0A= > --- a/lib/librte_eal/linuxapp/eal/eal_thread.c=0A= > +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c=0A= > @@ -39,6 +39,7 @@=0A= > #include =0A= > #include =0A= > #include =0A= > +#include =0A= > =0A= > #include =0A= > #include =0A= > @@ -51,12 +52,19 @@=0A= > #include =0A= > #include =0A= > #include =0A= > +#include =0A= > +#include =0A= > =0A= > #include "eal_private.h"=0A= > #include "eal_thread.h"=0A= > =0A= > +#define LINEAR_THREAD_ID_POOL "THREAD_ID_POOL"=0A= > +=0A= > RTE_DEFINE_PER_LCORE(unsigned, _lcore_id);=0A= > =0A= > +/* define linear thread id as thread-local variables */=0A= > +RTE_DEFINE_PER_LCORE(unsigned, _thread_id);=0A= > +=0A= > /*=0A= > * Send a message to a slave lcore identified by slave_id to call a=0A= > * function f with argument arg. Once the execution is done, the=0A= > @@ -94,12 +102,13 @@ rte_eal_remote_launch(int (*f)(void *), void *arg, u= nsigned slave_id)=0A= > return 0;=0A= > }=0A= > =0A= > +=0A= > /* set affinity for current thread */=0A= > static int=0A= > -eal_thread_set_affinity(void)=0A= > +__eal_thread_set_affinity(pthread_t thread, unsigned lcore)=0A= > {=0A= > +=0A= > int s;=0A= > - pthread_t thread;=0A= > =0A= > /*=0A= > * According to the section VERSIONS of the CPU_ALLOC man page:=0A= > @@ -126,9 +135,8 @@ eal_thread_set_affinity(void)=0A= > =0A= > size =3D CPU_ALLOC_SIZE(RTE_MAX_LCORE);=0A= > CPU_ZERO_S(size, cpusetp);=0A= > - CPU_SET_S(rte_lcore_id(), size, cpusetp);=0A= > + CPU_SET_S(lcore, size, cpusetp);=0A= > =0A= > - thread =3D pthread_self();=0A= > s =3D pthread_setaffinity_np(thread, size, cpusetp);=0A= > if (s !=3D 0) {=0A= > RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");=0A= > @@ -140,9 +148,8 @@ eal_thread_set_affinity(void)=0A= > #else /* CPU_ALLOC */=0A= > cpu_set_t cpuset;=0A= > CPU_ZERO( &cpuset );=0A= > - CPU_SET( rte_lcore_id(), &cpuset );=0A= > + CPU_SET(lcore, &cpuset );=0A= > =0A= > - thread =3D pthread_self();=0A= > s =3D pthread_setaffinity_np(thread, sizeof( cpuset ), &cpuset);=0A= > if (s !=3D 0) {=0A= > RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");=0A= > @@ -152,6 +159,15 @@ eal_thread_set_affinity(void)=0A= > return 0;=0A= > }=0A= > =0A= > +/* set affinity for current thread */=0A= > +static int=0A= > +eal_thread_set_affinity(void)=0A= > +{=0A= > + pthread_t thread =3D pthread_self();=0A= > +=0A= > + return __eal_thread_set_affinity(thread, rte_lcore_id());=0A= > +}=0A= > +=0A= > void eal_thread_init_master(unsigned lcore_id)=0A= > {=0A= > /* set the lcore ID in per-lcore memory area */=0A= > @@ -162,6 +178,87 @@ void eal_thread_init_master(unsigned lcore_id)=0A= > rte_panic("cannot set affinity\n");=0A= > }=0A= > =0A= > +/* linear thread id control block */=0A= > +struct eal_thread_cb {=0A= > + rte_spinlock_t lock;=0A= > + uint64_t nb_bucket;=0A= > + uint64_t bitmap[0];=0A= > +};=0A= > +=0A= =0A= Can this struct been declared in header files?=0A= =0A= > +static struct eal_thread_cb *=0A= > +__create_tid_pool(void)=0A= > +{=0A= > + const struct rte_memzone *mz;=0A= > + struct eal_thread_cb *pcb;=0A= > + uint64_t sz;=0A= > + uint64_t nb_bucket;=0A= > +=0A= > + nb_bucket =3D RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) / 64;=0A= =0A= Is it better to replace division to right shift?=0A= =0A= nb_bucket =3D RTE_ALIGN_CEIL(RTE_MAX_THREAD, 64) >> 6;=0A= =0A= =0A= > + sz =3D sizeof(*pcb) + nb_bucket * sizeof(uint64_t);=0A= > + mz =3D rte_memzone_reserve(LINEAR_THREAD_ID_POOL,=0A= > + sz, rte_socket_id(), 0);=0A= > + if (mz =3D=3D NULL)=0A= > + rte_panic("Cannot allocate linear thread ID pool\n");=0A= > +=0A= > + pcb =3D mz->addr;=0A= > + rte_spinlock_init(&pcb->lock);=0A= > + pcb->nb_bucket =3D nb_bucket;=0A= > + memset(pcb->bitmap, 0, nb_bucket * sizeof(uint64_t));=0A= > +=0A= > + return pcb;=0A= > +}=0A= > +=0A= > +static int=0A= > +__get_linear_tid(uint64_t *tid)=0A= > +{=0A= > + const struct rte_memzone *mz;=0A= > + struct eal_thread_cb *pcb;=0A= > + uint64_t i;=0A= > + uint8_t shift =3D 0;=0A= > +=0A= > + mz =3D rte_memzone_lookup(LINEAR_THREAD_ID_POOL);=0A= > + if (mz !=3D NULL)=0A= > + pcb =3D mz->addr;=0A= > + else=0A= > + pcb =3D __create_tid_pool();=0A= > +=0A= > + rte_spinlock_lock(&pcb->lock);=0A= > + for (i =3D 0; i < pcb->nb_bucket; i++) {=0A= > + if (pcb->bitmap[i] =3D=3D (uint64_t)-1)=0A= =0A= It is better for bitmap as ~0(or ~(uint64_t)0) instead of (uint64_t)-1=0A= for all bit set.=0A= =0A= > + continue;=0A= > + shift =3D 0; =0A= > + while (pcb->bitmap[i] & (1UL << shift))=0A= > + shift ++;=0A= > + pcb->bitmap[i] |=3D (1UL << shift);=0A= > + break;=0A= > + }=0A= > + rte_spinlock_unlock(&pcb->lock);=0A= > +=0A= > + if (i =3D=3D pcb->nb_bucket)=0A= > + return -1;=0A= > +=0A= > + *tid =3D i * 64 + shift;=0A= > + return 0;=0A= > +}=0A= > +=0A= > +static void __rte_unused=0A= > +__put_linear_tid(uint64_t tid)=0A= > +{=0A= > + const struct rte_memzone *mz;=0A= > + struct eal_thread_cb *pcb;=0A= > + uint8_t shift;=0A= > +=0A= > + mz =3D rte_memzone_lookup(LINEAR_THREAD_ID_POOL);=0A= > + if (!mz)=0A= > + return;=0A= > +=0A= > + pcb =3D mz->addr;=0A= > + rte_spinlock_lock(&pcb->lock);=0A= > + shift =3D tid & 0x3F;=0A= > + pcb->bitmap[tid / 64] &=3D ~(1UL << shift);=0A= =0A= tid >> 6=0A= =0A= =0A= > + rte_spinlock_unlock(&pcb->lock); =0A= > +}=0A= > +=0A= > /* main loop of threads */=0A= > __attribute__((noreturn)) void *=0A= > eal_thread_loop(__attribute__((unused)) void *arg)=0A= > @@ -169,6 +266,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)=0A= > char c;=0A= > int n, ret;=0A= > unsigned lcore_id;=0A= > + unsigned long ltid =3D 0;=0A= > pthread_t thread_id;=0A= > int m2s, s2m;=0A= > =0A= > @@ -191,6 +289,11 @@ eal_thread_loop(__attribute__((unused)) void *arg)= =0A= > /* set the lcore ID in per-lcore memory area */=0A= > RTE_PER_LCORE(_lcore_id) =3D lcore_id;=0A= > =0A= > + /* set the linear thread ID in per-lcore memory area */=0A= > + if (__get_linear_tid(<id) < 0)=0A= > + rte_panic("cannot get cache slot id\n");=0A= > + RTE_PER_LCORE(_thread_id) =3D ltid;=0A= > +=0A= > /* set CPU affinity */=0A= > if (eal_thread_set_affinity() < 0)=0A= > rte_panic("cannot set affinity\n");=0A= =0A=