patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH] eal/unix: optimize thread creation with glibc
@ 2024-11-02 10:08 David Marchand
  2024-11-02 11:32 ` [PATCH v2] " David Marchand
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: David Marchand @ 2024-11-02 10:08 UTC (permalink / raw)
  To: dev; +Cc: thomas, stephen, luca.boccassi, stable, Tyler Retzlaff

Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np, but it is not available with musl.
Assume availability by relying on __USE_GNU that is not set with musl.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/eal/unix/rte_thread.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..e42b6c37a2 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -4,6 +4,7 @@
  */
 
 #include <errno.h>
+#include <features.h>
 #include <pthread.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -19,6 +20,7 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+#ifndef __USE_GNU
 struct thread_start_context {
 	rte_thread_func thread_func;
 	void *thread_args;
@@ -28,6 +30,7 @@ struct thread_start_context {
 	int wrapper_ret;
 	bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +91,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 	return 0;
 }
 
+#ifndef __USE_GNU
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +117,7 @@ thread_start_wrapper(void *arg)
 
 	return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +131,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
+#ifndef __USE_GNU
 	struct thread_start_context ctx = {
 		.thread_func = thread_func,
 		.thread_args = args,
@@ -134,6 +140,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.wrapper_cond = PTHREAD_COND_INITIALIZER,
 	};
+#endif
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -144,6 +151,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
 		attrp = &attr;
 
+#ifdef __USE_GNU
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp, sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				EAL_LOG(DEBUG, "pthread_attr_setaffinity_np failed");
+				goto cleanup;
+			}
+		}
+#endif
 		/*
 		 * Set the inherit scheduler parameter to explicit,
 		 * otherwise the priority attribute is ignored.
@@ -178,6 +195,14 @@ rte_thread_create(rte_thread_t *thread_id,
 		}
 	}
 
+#ifdef __USE_GNU
+	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+		(void *)(void *)thread_func, args);
+	if (ret != 0) {
+		EAL_LOG(DEBUG, "pthread_create failed");
+		goto cleanup;
+	}
+#else /* !__USE_GNU */
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
 		thread_start_wrapper, &ctx);
 	if (ret != 0) {
@@ -193,6 +218,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
 	if (ret != 0)
 		rte_thread_join(*thread_id, NULL);
+#endif /* __USE_GNU */
 
 cleanup:
 	if (attrp != NULL)
-- 
2.46.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] eal/unix: optimize thread creation with glibc
  2024-11-02 10:08 [PATCH] eal/unix: optimize thread creation with glibc David Marchand
@ 2024-11-02 11:32 ` David Marchand
  2024-11-02 12:36   ` Luca Boccassi
  2024-11-02 19:58 ` [PATCH] " Stephen Hemminger
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: David Marchand @ 2024-11-02 11:32 UTC (permalink / raw)
  To: dev; +Cc: thomas, stephen, luca.boccassi, stable, Tyler Retzlaff

Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np, but it is not available with musl.
Assume availability by relying on __USE_GNU that is set with glibc.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v1:
- fixed build with FreeBSD,

---
 lib/eal/unix/rte_thread.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..03c4164059 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+#ifndef __USE_GNU
 struct thread_start_context {
 	rte_thread_func thread_func;
 	void *thread_args;
@@ -28,6 +29,7 @@ struct thread_start_context {
 	int wrapper_ret;
 	bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +90,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 	return 0;
 }
 
+#ifndef __USE_GNU
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +116,7 @@ thread_start_wrapper(void *arg)
 
 	return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +130,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
+#ifndef __USE_GNU
 	struct thread_start_context ctx = {
 		.thread_func = thread_func,
 		.thread_args = args,
@@ -134,6 +139,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.wrapper_cond = PTHREAD_COND_INITIALIZER,
 	};
+#endif
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -144,6 +150,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
 		attrp = &attr;
 
+#ifdef __USE_GNU
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp, sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				EAL_LOG(DEBUG, "pthread_attr_setaffinity_np failed");
+				goto cleanup;
+			}
+		}
+#endif
 		/*
 		 * Set the inherit scheduler parameter to explicit,
 		 * otherwise the priority attribute is ignored.
@@ -178,6 +194,14 @@ rte_thread_create(rte_thread_t *thread_id,
 		}
 	}
 
+#ifdef __USE_GNU
+	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+		(void *)(void *)thread_func, args);
+	if (ret != 0) {
+		EAL_LOG(DEBUG, "pthread_create failed");
+		goto cleanup;
+	}
+#else /* !__USE_GNU */
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
 		thread_start_wrapper, &ctx);
 	if (ret != 0) {
@@ -193,6 +217,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
 	if (ret != 0)
 		rte_thread_join(*thread_id, NULL);
+#endif /* __USE_GNU */
 
 cleanup:
 	if (attrp != NULL)
-- 
2.46.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] eal/unix: optimize thread creation with glibc
  2024-11-02 11:32 ` [PATCH v2] " David Marchand
@ 2024-11-02 12:36   ` Luca Boccassi
  0 siblings, 0 replies; 10+ messages in thread
From: Luca Boccassi @ 2024-11-02 12:36 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, stephen, stable, Tyler Retzlaff

On Sat, 2 Nov 2024 at 11:32, David Marchand <david.marchand@redhat.com> wrote:
>
> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
>
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
>
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
>
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np, but it is not available with musl.
> Assume availability by relying on __USE_GNU that is set with glibc.
>
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: stable@dpdk.org
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> Changes since v1:
> - fixed build with FreeBSD,
>
> ---
>  lib/eal/unix/rte_thread.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)

The test now completes in 1.19 seconds, so this fixes the issue with glibc:

[  438s] 36/82 DPDK:fast-tests / lcores_autotest             OK
      1.19s

I do not use musl, so this is good enough for me. Thanks!

Acked-by: Luca Boccassi <bluca@debian.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] eal/unix: optimize thread creation with glibc
  2024-11-02 10:08 [PATCH] eal/unix: optimize thread creation with glibc David Marchand
  2024-11-02 11:32 ` [PATCH v2] " David Marchand
@ 2024-11-02 19:58 ` Stephen Hemminger
  2024-11-03 10:37   ` David Marchand
  2024-11-03 11:25 ` [PATCH v3] " David Marchand
  2024-11-04  8:57 ` [PATCH v4] eal/unix: optimize thread creation David Marchand
  3 siblings, 1 reply; 10+ messages in thread
From: Stephen Hemminger @ 2024-11-02 19:58 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, luca.boccassi, stable, Tyler Retzlaff

On Sat,  2 Nov 2024 11:08:39 +0100
David Marchand <david.marchand@redhat.com> wrote:

> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
> 
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
> 
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
> 
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np, but it is not available with musl.
> Assume availability by relying on __USE_GNU that is not set with musl.
> 
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Seems like a reasonable way forward but not sure.

What about FreeBSD (which shares this code)?

Looking inside glibc, what it does is have the parent call sched_setaffinty()
after the clone system call with the thread id. So using attributes is
just as racy as the original code.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] eal/unix: optimize thread creation with glibc
  2024-11-02 19:58 ` [PATCH] " Stephen Hemminger
@ 2024-11-03 10:37   ` David Marchand
  0 siblings, 0 replies; 10+ messages in thread
From: David Marchand @ 2024-11-03 10:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, thomas, luca.boccassi, stable, Tyler Retzlaff

On Sat, Nov 2, 2024 at 8:58 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Sat,  2 Nov 2024 11:08:39 +0100
> David Marchand <david.marchand@redhat.com> wrote:
>
> > Setting the cpu affinity of the child thread from the parent thread is
> > racy when using pthread_setaffinity_np, as the child thread may start
> > running and initialize before affinity is set.
> >
> > On the other hand, setting the cpu affinity from the child thread itself
> > may fail, so the parent thread waits for the child thread to report
> > whether this call succeeded.
> >
> > This synchronisation point resulted in a significant slow down of
> > rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> > for some ARM systems).
> >
> > Another option for setting cpu affinity is to use the not portable
> > pthread_attr_setaffinity_np, but it is not available with musl.
> > Assume availability by relying on __USE_GNU that is not set with musl.
> >
> > Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> Seems like a reasonable way forward but not sure.
>
> What about FreeBSD (which shares this code)?

I did not investigate FreeBSD.
Either we find a "good" #define, or we could add some detection in
meson (probably the best option).


>
> Looking inside glibc, what it does is have the parent call sched_setaffinty()
> after the clone system call with the thread id. So using attributes is
> just as racy as the original code.

It there was such a race in the glibc, I would call it a bug.

Afaiu, the parent calls sched_setaffinity while holding a lock on the
child thread, see CREATE THREAD NOTES: and stopped_start.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] eal/unix: optimize thread creation with glibc
  2024-11-02 10:08 [PATCH] eal/unix: optimize thread creation with glibc David Marchand
  2024-11-02 11:32 ` [PATCH v2] " David Marchand
  2024-11-02 19:58 ` [PATCH] " Stephen Hemminger
@ 2024-11-03 11:25 ` David Marchand
  2024-11-03 16:10   ` Stephen Hemminger
  2024-11-04  0:19   ` fengchengwen
  2024-11-04  8:57 ` [PATCH v4] eal/unix: optimize thread creation David Marchand
  3 siblings, 2 replies; 10+ messages in thread
From: David Marchand @ 2024-11-03 11:25 UTC (permalink / raw)
  To: dev; +Cc: thomas, stephen, luca.boccassi, stable, Luca Boccassi, Tyler Retzlaff

Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np, but it is not available with musl.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Luca Boccassi <bluca@debian.org>
---
Changes since v2:
- added pthread_attr_setaffinity_np() detection,

Changes since v1:
- fixed build with FreeBSD,

---
 lib/eal/unix/meson.build  |  5 +++++
 lib/eal/unix/rte_thread.c | 25 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/lib/eal/unix/meson.build b/lib/eal/unix/meson.build
index cc7d67dd32..f845625a54 100644
--- a/lib/eal/unix/meson.build
+++ b/lib/eal/unix/meson.build
@@ -11,3 +11,8 @@ sources += files(
         'eal_unix_timer.c',
         'rte_thread.c',
 )
+
+if cc.has_function('pthread_attr_setaffinity_np', args: '-D_GNU_SOURCE',
+                   prefix : '#include <pthread.h>')
+    cflags += '-DRTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP'
+endif
diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..ea629c2065 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 struct thread_start_context {
 	rte_thread_func thread_func;
 	void *thread_args;
@@ -28,6 +29,7 @@ struct thread_start_context {
 	int wrapper_ret;
 	bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +90,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 	return 0;
 }
 
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +116,7 @@ thread_start_wrapper(void *arg)
 
 	return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +130,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 	struct thread_start_context ctx = {
 		.thread_func = thread_func,
 		.thread_args = args,
@@ -134,6 +139,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.wrapper_cond = PTHREAD_COND_INITIALIZER,
 	};
+#endif
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -144,6 +150,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
 		attrp = &attr;
 
+#ifdef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp, sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				EAL_LOG(DEBUG, "pthread_attr_setaffinity_np failed");
+				goto cleanup;
+			}
+		}
+#endif
 		/*
 		 * Set the inherit scheduler parameter to explicit,
 		 * otherwise the priority attribute is ignored.
@@ -178,6 +194,14 @@ rte_thread_create(rte_thread_t *thread_id,
 		}
 	}
 
+#ifdef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
+	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+		(void *)(void *)thread_func, args);
+	if (ret != 0) {
+		EAL_LOG(DEBUG, "pthread_create failed");
+		goto cleanup;
+	}
+#else /* !RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP */
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
 		thread_start_wrapper, &ctx);
 	if (ret != 0) {
@@ -193,6 +217,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
 	if (ret != 0)
 		rte_thread_join(*thread_id, NULL);
+#endif /* RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP */
 
 cleanup:
 	if (attrp != NULL)
-- 
2.46.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] eal/unix: optimize thread creation with glibc
  2024-11-03 11:25 ` [PATCH v3] " David Marchand
@ 2024-11-03 16:10   ` Stephen Hemminger
  2024-11-04  0:19   ` fengchengwen
  1 sibling, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2024-11-03 16:10 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, thomas, luca.boccassi, stable, Luca Boccassi, Tyler Retzlaff

On Sun,  3 Nov 2024 12:25:19 +0100
David Marchand <david.marchand@redhat.com> wrote:

> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
> 
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
> 
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
> 
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np, but it is not available with musl.
> 
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Luca Boccassi <bluca@debian.org>
> ---
> Changes since v2:
> - added pthread_attr_setaffinity_np() detection,
> 
> Changes since v1:
> - fixed build with FreeBSD,


Looks good, reading the glibc source there are internal flags
to deal with all the states new threads have to deal with.
Glibc is complex there, but comments are worth reading.

I wonder if part of the reason this is more of a problem for DPDK
is that by default new thread inherits cpuset of the parent.
So the child thread will get stuck on same CPU as the main thread.

Acked-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] eal/unix: optimize thread creation with glibc
  2024-11-03 11:25 ` [PATCH v3] " David Marchand
  2024-11-03 16:10   ` Stephen Hemminger
@ 2024-11-04  0:19   ` fengchengwen
  1 sibling, 0 replies; 10+ messages in thread
From: fengchengwen @ 2024-11-04  0:19 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, stephen, luca.boccassi, stable, Luca Boccassi, Tyler Retzlaff

Acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 2024/11/3 19:25, David Marchand wrote:
> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
> 
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
> 
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
> 
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np, but it is not available with musl.
> 
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Luca Boccassi <bluca@debian.org>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v4] eal/unix: optimize thread creation
  2024-11-02 10:08 [PATCH] eal/unix: optimize thread creation with glibc David Marchand
                   ` (2 preceding siblings ...)
  2024-11-03 11:25 ` [PATCH v3] " David Marchand
@ 2024-11-04  8:57 ` David Marchand
  2024-11-04 17:12   ` David Marchand
  3 siblings, 1 reply; 10+ messages in thread
From: David Marchand @ 2024-11-04  8:57 UTC (permalink / raw)
  To: dev
  Cc: thomas, stephen, luca.boccassi, stable, Luca Boccassi,
	Chengwen Feng, Tyler Retzlaff

Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np available in FreeBSD and glibc,
but not available in musl.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: stable@dpdk.org

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Luca Boccassi <bluca@debian.org>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Chengwen Feng <fengchengwen@huawei.com>
---
Changes since v3:
- since _np symbols are in (non standard header) pthread_np.h on FreeBSD,
  and since this header is unconditionnally included in rte_os.h,
  assumed availability of pthread_attr_setaffinity_np (added in 8.0),

Changes since v2:
- added pthread_attr_setaffinity_np() detection,

Changes since v1:
- fixed build with FreeBSD,

---
 lib/eal/unix/meson.build  |  5 +++++
 lib/eal/unix/rte_thread.c | 25 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/lib/eal/unix/meson.build b/lib/eal/unix/meson.build
index cc7d67dd32..f1eb82e16a 100644
--- a/lib/eal/unix/meson.build
+++ b/lib/eal/unix/meson.build
@@ -11,3 +11,8 @@ sources += files(
         'eal_unix_timer.c',
         'rte_thread.c',
 )
+
+if is_freebsd or cc.has_function('pthread_attr_setaffinity_np', args: '-D_GNU_SOURCE',
+                                 prefix : '#include <pthread.h>')
+    cflags += '-DRTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP'
+endif
diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..ea629c2065 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 struct thread_start_context {
 	rte_thread_func thread_func;
 	void *thread_args;
@@ -28,6 +29,7 @@ struct thread_start_context {
 	int wrapper_ret;
 	bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +90,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 	return 0;
 }
 
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +116,7 @@ thread_start_wrapper(void *arg)
 
 	return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +130,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
+#ifndef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
 	struct thread_start_context ctx = {
 		.thread_func = thread_func,
 		.thread_args = args,
@@ -134,6 +139,7 @@ rte_thread_create(rte_thread_t *thread_id,
 		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.wrapper_cond = PTHREAD_COND_INITIALIZER,
 	};
+#endif
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -144,6 +150,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
 		attrp = &attr;
 
+#ifdef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp, sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				EAL_LOG(DEBUG, "pthread_attr_setaffinity_np failed");
+				goto cleanup;
+			}
+		}
+#endif
 		/*
 		 * Set the inherit scheduler parameter to explicit,
 		 * otherwise the priority attribute is ignored.
@@ -178,6 +194,14 @@ rte_thread_create(rte_thread_t *thread_id,
 		}
 	}
 
+#ifdef RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP
+	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+		(void *)(void *)thread_func, args);
+	if (ret != 0) {
+		EAL_LOG(DEBUG, "pthread_create failed");
+		goto cleanup;
+	}
+#else /* !RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP */
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
 		thread_start_wrapper, &ctx);
 	if (ret != 0) {
@@ -193,6 +217,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
 	if (ret != 0)
 		rte_thread_join(*thread_id, NULL);
+#endif /* RTE_EAL_PTHREAD_ATTR_SETAFFINITY_NP */
 
 cleanup:
 	if (attrp != NULL)
-- 
2.46.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] eal/unix: optimize thread creation
  2024-11-04  8:57 ` [PATCH v4] eal/unix: optimize thread creation David Marchand
@ 2024-11-04 17:12   ` David Marchand
  0 siblings, 0 replies; 10+ messages in thread
From: David Marchand @ 2024-11-04 17:12 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, thomas, stephen, luca.boccassi, stable, Luca Boccassi,
	Chengwen Feng, Tyler Retzlaff

On Mon, Nov 4, 2024 at 9:57 AM David Marchand <david.marchand@redhat.com> wrote:
>
> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
>
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
>
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
>
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np available in FreeBSD and glibc,
> but not available in musl.
>
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: stable@dpdk.org
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Luca Boccassi <bluca@debian.org>
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Chengwen Feng <fengchengwen@huawei.com>

CI looks good, and I tested manually in FreeBSD and ARM+OBS.

Applied, thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-11-04 17:12 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-02 10:08 [PATCH] eal/unix: optimize thread creation with glibc David Marchand
2024-11-02 11:32 ` [PATCH v2] " David Marchand
2024-11-02 12:36   ` Luca Boccassi
2024-11-02 19:58 ` [PATCH] " Stephen Hemminger
2024-11-03 10:37   ` David Marchand
2024-11-03 11:25 ` [PATCH v3] " David Marchand
2024-11-03 16:10   ` Stephen Hemminger
2024-11-04  0:19   ` fengchengwen
2024-11-04  8:57 ` [PATCH v4] eal/unix: optimize thread creation David Marchand
2024-11-04 17:12   ` David Marchand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).