DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 1/2] eal: fix failure race and behavior of thread create
@ 2023-03-02 18:44 Tyler Retzlaff
  2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
                   ` (7 more replies)
  0 siblings, 8 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-02 18:44 UTC (permalink / raw)
  To: dev; +Cc: david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by using the pthread_setaffinity_np
prior to thread creation to set the affinity of the created thread. By
doing this no failure paths exist after pthread_create returns
successfully.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org
Cc: roretzla@linux.microsoft.com

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5bf633b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -155,6 +155,17 @@ struct thread_routine_ctx {
 			RTE_LOG(DEBUG, EAL, "pthread_attr_setschedparam failed\n");
 			goto cleanup;
 		}
+
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp,
+				sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				RTE_LOG(DEBUG, EAL,
+					"pthread_attr_setaffinity_np failed\n");
+				goto cleanup;
+			}
+		}
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
@@ -164,15 +175,6 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
-
 	ctx = NULL;
 cleanup:
 	free(ctx);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 2/2] eal/windows: fix create thread failure behavior
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
@ 2023-03-02 18:44 ` Tyler Retzlaff
  2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-02 18:44 UTC (permalink / raw)
  To: dev; +Cc: david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after CreateThread may fail. Such
a failure is reported but strands the newly created thread in a
suspended state.

Resolve the above issue by notifying the newly created thread that
it should terminate as soon as it is resumed, while still continuing to
free the ctx.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org
Cc: roretzla@linux.microsoft.com

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/windows/rte_thread.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/lib/eal/windows/rte_thread.c b/lib/eal/windows/rte_thread.c
index 8556a84..e528ac9 100644
--- a/lib/eal/windows/rte_thread.c
+++ b/lib/eal/windows/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
 
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	bool thread_init_failed;
 	void *routine_args;
 };
 
@@ -167,9 +168,13 @@ struct thread_routine_ctx {
 thread_func_wrapper(void *arg)
 {
 	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	const bool thread_exit = __atomic_load_n(&ctx.thread_init_failed, __ATOMIC_ACQUIRE);
 
 	free(arg);
 
+	if (thread_exit)
+		return 0;
+
 	return (DWORD)ctx.thread_func(ctx.routine_args);
 }
 
@@ -183,6 +188,7 @@ struct thread_routine_ctx {
 	HANDLE thread_handle = NULL;
 	GROUP_AFFINITY thread_affinity;
 	struct thread_routine_ctx *ctx;
+	bool thread_exit = false;
 
 	ctx = calloc(1, sizeof(*ctx));
 	if (ctx == NULL) {
@@ -192,6 +198,7 @@ struct thread_routine_ctx {
 	}
 	ctx->routine_args = args;
 	ctx->thread_func = thread_func;
+	ctx->thread_init_failed = false;
 
 	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
 		CREATE_SUSPENDED, &tid);
@@ -209,23 +216,29 @@ struct thread_routine_ctx {
 							);
 			if (ret != 0) {
 				RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
-				goto cleanup;
+				thread_exit = true;
+				goto resume_thread;
 			}
 
 			if (!SetThreadGroupAffinity(thread_handle,
 						    &thread_affinity, NULL)) {
 				ret = thread_log_last_error("SetThreadGroupAffinity()");
-				goto cleanup;
+				thread_exit = true;
+				goto resume_thread;
 			}
 		}
 		ret = rte_thread_set_priority(*thread_id,
 				thread_attr->priority);
 		if (ret != 0) {
 			RTE_LOG(DEBUG, EAL, "Unable to set thread priority\n");
-			goto cleanup;
+			thread_exit = true;
+			goto resume_thread;
 		}
 	}
 
+resume_thread:
+	__atomic_store_n(&ctx->thread_init_failed, thread_exit, __ATOMIC_RELEASE);
+
 	if (ResumeThread(thread_handle) == (DWORD)-1) {
 		ret = thread_log_last_error("ResumeThread()");
 		goto cleanup;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
  2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
@ 2023-03-07 14:33 ` David Marchand
  2023-03-09  9:17   ` David Marchand
  2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 34+ messages in thread
From: David Marchand @ 2023-03-07 14:33 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, stable

On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve both of the above issues by using the pthread_setaffinity_np
> prior to thread creation to set the affinity of the created thread. By
> doing this no failure paths exist after pthread_create returns
> successfully.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
> Cc: roretzla@linux.microsoft.com
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

Reviewed-by: David Marchand <david.marchand@redhat.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
@ 2023-03-09  9:17   ` David Marchand
  2023-03-09  9:58     ` Thomas Monjalon
  0 siblings, 1 reply; 34+ messages in thread
From: David Marchand @ 2023-03-09  9:17 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, stable

On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve both of the above issues by using the pthread_setaffinity_np
> > prior to thread creation to set the affinity of the created thread. By
> > doing this no failure paths exist after pthread_create returns
> > successfully.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> > Cc: roretzla@linux.microsoft.com
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Reviewed-by: David Marchand <david.marchand@redhat.com>

Series applied, thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09  9:17   ` David Marchand
@ 2023-03-09  9:58     ` Thomas Monjalon
  2023-03-09 20:49       ` Tyler Retzlaff
  0 siblings, 1 reply; 34+ messages in thread
From: Thomas Monjalon @ 2023-03-09  9:58 UTC (permalink / raw)
  To: Tyler Retzlaff, David Marchand; +Cc: dev, stable

09/03/2023 10:17, David Marchand:
> On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > In rte_thread_create setting affinity after pthread_create may fail.
> > > Such a failure should result in the entire rte_thread_create failing
> > > but doesn't.
> > >
> > > Additionally if there is a failure to set affinity a race exists where
> > > the creating thread will free ctx and depending on scheduling of the new
> > > thread it may also free ctx (double free).
> > >
> > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > prior to thread creation to set the affinity of the created thread. By
> > > doing this no failure paths exist after pthread_create returns
> > > successfully.
> > >
> > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > Cc: stable@dpdk.org
> > > Cc: roretzla@linux.microsoft.com
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Reviewed-by: David Marchand <david.marchand@redhat.com>
> 
> Series applied, thanks.

Unfortunately we cannot merge this patch
because it does not compile on Alpine Linux (musl libc):

lib/eal/unix/rte_thread.c:160:31: error:
implicit declaration of function 'pthread_attr_setaffinity_np'

Is it possible to fix the race without using pthread_attr_setaffinity_np?



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09  9:58     ` Thomas Monjalon
@ 2023-03-09 20:49       ` Tyler Retzlaff
  2023-03-09 21:05         ` David Marchand
  0 siblings, 1 reply; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-09 20:49 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: David Marchand, dev, stable

On Thu, Mar 09, 2023 at 10:58:06AM +0100, Thomas Monjalon wrote:
> 09/03/2023 10:17, David Marchand:
> > On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > > <roretzla@linux.microsoft.com> wrote:
> > > >
> > > > In rte_thread_create setting affinity after pthread_create may fail.
> > > > Such a failure should result in the entire rte_thread_create failing
> > > > but doesn't.
> > > >
> > > > Additionally if there is a failure to set affinity a race exists where
> > > > the creating thread will free ctx and depending on scheduling of the new
> > > > thread it may also free ctx (double free).
> > > >
> > > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > > prior to thread creation to set the affinity of the created thread. By
> > > > doing this no failure paths exist after pthread_create returns
> > > > successfully.
> > > >
> > > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > > Cc: stable@dpdk.org
> > > > Cc: roretzla@linux.microsoft.com
> > > >
> > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > Reviewed-by: David Marchand <david.marchand@redhat.com>
> > 
> > Series applied, thanks.
> 
> Unfortunately we cannot merge this patch
> because it does not compile on Alpine Linux (musl libc):
> 
> lib/eal/unix/rte_thread.c:160:31: error:
> implicit declaration of function 'pthread_attr_setaffinity_np'

i didn't get any CI failure for this. did i just miss it?

> 
> Is it possible to fix the race without using pthread_attr_setaffinity_np?
> 

it seems we never allowed threads to be created with a set affinity when
using pthread_create directly (that was portable to alpine linux).  for worker
threads the start_routine is setting the affinity from the new thread.

certainly we can make this work by doing the same thing, but we'll have
to adjust the start routine wrapper to synchronize/wait for the new
thread to set the affinity and if it fails terminate the new thread
cleanly.

i don't have a way to build for alpine linux or run the unit tests, does
someone want to make the above suggested adjustment? or i can try and
make a patch but someone else will have to carefully review and test.

let me know how you'd like to proceed.

ty

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09 20:49       ` Tyler Retzlaff
@ 2023-03-09 21:05         ` David Marchand
  0 siblings, 0 replies; 34+ messages in thread
From: David Marchand @ 2023-03-09 21:05 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: Thomas Monjalon, dev, stable

On Thu, Mar 9, 2023 at 9:49 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Thu, Mar 09, 2023 at 10:58:06AM +0100, Thomas Monjalon wrote:
> > 09/03/2023 10:17, David Marchand:
> > > On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > > > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > > > <roretzla@linux.microsoft.com> wrote:
> > > > >
> > > > > In rte_thread_create setting affinity after pthread_create may fail.
> > > > > Such a failure should result in the entire rte_thread_create failing
> > > > > but doesn't.
> > > > >
> > > > > Additionally if there is a failure to set affinity a race exists where
> > > > > the creating thread will free ctx and depending on scheduling of the new
> > > > > thread it may also free ctx (double free).
> > > > >
> > > > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > > > prior to thread creation to set the affinity of the created thread. By
> > > > > doing this no failure paths exist after pthread_create returns
> > > > > successfully.
> > > > >
> > > > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > > > Cc: stable@dpdk.org
> > > > > Cc: roretzla@linux.microsoft.com
> > > > >
> > > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > > Reviewed-by: David Marchand <david.marchand@redhat.com>
> > >
> > > Series applied, thanks.
> >
> > Unfortunately we cannot merge this patch
> > because it does not compile on Alpine Linux (musl libc):
> >
> > lib/eal/unix/rte_thread.c:160:31: error:
> > implicit declaration of function 'pthread_attr_setaffinity_np'
>
> i didn't get any CI failure for this. did i just miss it?

Count on me, I would have complained if there was a CI issue ;-).


>
> >
> > Is it possible to fix the race without using pthread_attr_setaffinity_np?
> >
>
> it seems we never allowed threads to be created with a set affinity when
> using pthread_create directly (that was portable to alpine linux).  for worker
> threads the start_routine is setting the affinity from the new thread.
>
> certainly we can make this work by doing the same thing, but we'll have
> to adjust the start routine wrapper to synchronize/wait for the new
> thread to set the affinity and if it fails terminate the new thread
> cleanly.
>
> i don't have a way to build for alpine linux or run the unit tests, does
> someone want to make the above suggested adjustment? or i can try and
> make a patch but someone else will have to carefully review and test.
>
> let me know how you'd like to proceed.

UNH is looking into re-enabling the Alpine job.


For the time being, if you have a github repository, I can propose a
quick patch using GHA:
https://github.com/david-marchand/dpdk/commit/ci

I had tested compilation with a previous version of this patch.
I just added running the unit tests (adding a checks: tests line in
the job matrix), let's see how it goes...
https://github.com/david-marchand/dpdk/actions/runs/4378715081/jobs/7663806639


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v2 0/2] fix race in rte_thread_create failure path
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
  2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
  2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
@ 2023-03-13 23:31 ` Tyler Retzlaff
  2023-03-13 23:31   ` [PATCH v2 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
                     ` (2 more replies)
  2023-03-14 22:44 ` [PATCH v3 " Tyler Retzlaff
                   ` (4 subsequent siblings)
  7 siblings, 3 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-13 23:31 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

v2:
  * new approach over v1 of the patch to avoid using pthread np API that
    is not available on Alpine Linux.
  * to conform to rte_thread_create parameter const qualification include
    an additional patch to const qualify rte_thread_set_affinity cpusetp
    parameter.

Tyler Retzlaff (2):
  eal: make cpusetp to rte thread set affinity const
  eal: fix failure path race setting new thread affinity

 lib/eal/common/eal_common_thread.c |  6 ++---
 lib/eal/include/rte_thread.h       |  2 +-
 lib/eal/unix/rte_thread.c          | 52 ++++++++++++++++++++++++++++++--------
 3 files changed, 46 insertions(+), 14 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v2 1/2] eal: make cpusetp to rte thread set affinity const
  2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-13 23:31   ` Tyler Retzlaff
  2023-03-13 23:31   ` [PATCH v2 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  2023-03-14 11:47   ` [PATCH v2 0/2] fix race in rte_thread_create failure path David Marchand
  2 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-13 23:31 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

const qualify rte_cpuset_t *cpusetp parameter in the
rte_thread_set_affinity API.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/eal_common_thread.c | 6 +++---
 lib/eal/include/rte_thread.h       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 079a385..c9e5619 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -34,7 +34,7 @@ unsigned rte_socket_id(void)
 }
 
 static int
-eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+eal_cpuset_socket_id(const rte_cpuset_t *cpusetp)
 {
 	unsigned cpu = 0;
 	int socket_id = SOCKET_ID_ANY;
@@ -62,7 +62,7 @@ unsigned rte_socket_id(void)
 }
 
 static void
-thread_update_affinity(rte_cpuset_t *cpusetp)
+thread_update_affinity(const rte_cpuset_t *cpusetp)
 {
 	unsigned int lcore_id = rte_lcore_id();
 
@@ -83,7 +83,7 @@ unsigned rte_socket_id(void)
 }
 
 int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+rte_thread_set_affinity(const rte_cpuset_t *cpusetp)
 {
 	if (rte_thread_set_affinity_by_id(rte_thread_self(), cpusetp) != 0) {
 		RTE_LOG(ERR, EAL, "rte_thread_set_affinity_by_id failed\n");
diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index ddd411e..e461354 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -348,7 +348,7 @@ int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
  * @return
  *   On success, return 0; otherwise return -1;
  */
-int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+int rte_thread_set_affinity(const rte_cpuset_t *cpusetp);
 
 /**
  * Get core affinity of the current thread.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v2 2/2] eal: fix failure path race setting new thread affinity
  2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-13 23:31   ` [PATCH v2 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
@ 2023-03-13 23:31   ` Tyler Retzlaff
  2023-03-14 11:47   ` [PATCH v2 0/2] fix race in rte_thread_create failure path David Marchand
  2 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-13 23:31 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 52 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..a09fb08 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <rte_cycles.h>
 #include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_thread.h>
@@ -16,8 +17,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +93,24 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
 	free(arg);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +121,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +135,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,16 +191,21 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+			sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
+		free(ctx);
 	}
-
 	ctx = NULL;
+
 cleanup:
 	free(ctx);
 	if (attrp != NULL)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v2 0/2] fix race in rte_thread_create failure path
  2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-13 23:31   ` [PATCH v2 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
  2023-03-13 23:31   ` [PATCH v2 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
@ 2023-03-14 11:47   ` David Marchand
  2023-03-14 13:59     ` Tyler Retzlaff
  2 siblings, 1 reply; 34+ messages in thread
From: David Marchand @ 2023-03-14 11:47 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas

On Tue, Mar 14, 2023 at 12:31 AM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> v2:
>   * new approach over v1 of the patch to avoid using pthread np API that
>     is not available on Alpine Linux.
>   * to conform to rte_thread_create parameter const qualification include
>     an additional patch to const qualify rte_thread_set_affinity cpusetp
>     parameter.
>
> Tyler Retzlaff (2):
>   eal: make cpusetp to rte thread set affinity const
>   eal: fix failure path race setting new thread affinity
>
>  lib/eal/common/eal_common_thread.c |  6 ++---
>  lib/eal/include/rte_thread.h       |  2 +-
>  lib/eal/unix/rte_thread.c          | 52 ++++++++++++++++++++++++++++++--------
>  3 files changed, 46 insertions(+), 14 deletions(-)

ASan flagged some use after free.
See logs https://github.com/ovsrobot/dpdk/suites/11537702259/artifacts/597032673

24/90 DPDK:fast-tests / lcores_autotest       FAIL     1.72 s (exit status 1)

--- command ---
00:24:14 DPDK_TEST='lcores_autotest'
/home/runner/work/dpdk/dpdk/build/app/test/dpdk-test
--file-prefix=lcores_autotest
--- stdout ---
RTE>>lcores_autotest
--- stderr ---
EAL: Detected CPU lcores: 2
EAL: Detected NUMA nodes: 1
EAL: Detected shared linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/lcores_autotest/mp_socket
EAL: Selected IOVA mode 'PA'
EAL: VFIO support initialized
APP: HPET is not enabled, using TSC as default timer
=================================================================
==70246==ERROR: AddressSanitizer: heap-use-after-free on address
0x60300000d044 at pc 0x7f6c9c49e1cf bp 0x7ffdbf1b3670 sp
0x7ffdbf1b3668
READ of size 4 at 0x60300000d044 thread T0
    #0 0x7f6c9c49e1ce in rte_thread_create
/home/runner/work/dpdk/dpdk/build/../lib/eal/unix/rte_thread.c:196:3
    #1 0x957e16 in test_non_eal_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:81:7
    #2 0x957e16 in test_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:400:6
    #3 0x4dcbc0 in cmd_autotest_parsed
/home/runner/work/dpdk/dpdk/build/../app/test/commands.c:68:10
    #4 0x7f6c9c0d3a88 in __cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:294:3
    #5 0x7f6c9c0d3a88 in cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:302:9
    #6 0x7f6c9c0d0907 in cmdline_valid_buffer
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:24:8
    #7 0x7f6c9c0d91c4 in rdline_char_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_rdline.c:444:5
    #8 0x7f6c9c0d0cd8 in cmdline_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:146:9
    #9 0x510205 in main
/home/runner/work/dpdk/dpdk/build/../app/test/test.c:208:15
    #10 0x7f6c9a92d082 in __libc_start_main
/build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16
    #11 0x432e4d in _start
(/home/runner/work/dpdk/dpdk/build/app/test/dpdk-test+0x432e4d)

0x60300000d044 is located 20 bytes inside of 32-byte region
[0x60300000d030,0x60300000d050)
freed by thread T6 here:
    #0 0x4acc3d in free
(/home/runner/work/dpdk/dpdk/build/app/test/dpdk-test+0x4acc3d)
    #1 0x7f6c9c49de64 in thread_func_wrapper
/home/runner/work/dpdk/dpdk/build/../lib/eal/unix/rte_thread.c:111:2
    #2 0x7f6c9ab28608 in start_thread
/build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8

previously allocated by thread T0 here:
    #0 0x4ad032 in calloc
(/home/runner/work/dpdk/dpdk/build/app/test/dpdk-test+0x4ad032)
    #1 0x7f6c9c49e021 in rte_thread_create
/home/runner/work/dpdk/dpdk/build/../lib/eal/unix/rte_thread.c:131:8
    #2 0x957e16 in test_non_eal_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:81:7
    #3 0x957e16 in test_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:400:6
    #4 0x4dcbc0 in cmd_autotest_parsed
/home/runner/work/dpdk/dpdk/build/../app/test/commands.c:68:10
    #5 0x7f6c9c0d3a88 in __cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:294:3
    #6 0x7f6c9c0d3a88 in cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:302:9
    #7 0x7f6c9c0d0907 in cmdline_valid_buffer
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:24:8
    #8 0x7f6c9c0d91c4 in rdline_char_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_rdline.c:444:5
    #9 0x7f6c9c0d0cd8 in cmdline_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:146:9
    #10 0x510205 in main
/home/runner/work/dpdk/dpdk/build/../app/test/test.c:208:15
    #11 0x7f6c9a92d082 in __libc_start_main
/build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16

Thread T6 created by T0 here:
    #0 0x4978ea in pthread_create
(/home/runner/work/dpdk/dpdk/build/app/test/dpdk-test+0x4978ea)
    #1 0x7f6c9c49e117 in rte_thread_create
/home/runner/work/dpdk/dpdk/build/../lib/eal/unix/rte_thread.c:187:8
    #2 0x957e16 in test_non_eal_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:81:7
    #3 0x957e16 in test_lcores
/home/runner/work/dpdk/dpdk/build/../app/test/test_lcores.c:400:6
    #4 0x4dcbc0 in cmd_autotest_parsed
/home/runner/work/dpdk/dpdk/build/../app/test/commands.c:68:10
    #5 0x7f6c9c0d3a88 in __cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:294:3
    #6 0x7f6c9c0d3a88 in cmdline_parse
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_parse.c:302:9
    #7 0x7f6c9c0d0907 in cmdline_valid_buffer
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:24:8
    #8 0x7f6c9c0d91c4 in rdline_char_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline_rdline.c:444:5
    #9 0x7f6c9c0d0cd8 in cmdline_in
/home/runner/work/dpdk/dpdk/build/../lib/cmdline/cmdline.c:146:9
    #10 0x510205 in main
/home/runner/work/dpdk/dpdk/build/../app/test/test.c:208:15
    #11 0x7f6c9a92d082 in __libc_start_main
/build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16

SUMMARY: AddressSanitizer: heap-use-after-free
/home/runner/work/dpdk/dpdk/build/../lib/eal/unix/rte_thread.c:196:3
in rte_thread_create
Shadow bytes around the buggy address:
  0x0c067fff99b0: fa fa 00 00 01 fa fa fa 00 00 00 00 fa fa 00 00
  0x0c067fff99c0: 00 00 fa fa 00 00 00 fa fa fa 00 00 00 06 fa fa
  0x0c067fff99d0: fd fd fd fa fa fa fd fd fd fa fa fa 00 00 00 07
  0x0c067fff99e0: fa fa fd fd fd fd fa fa fd fd fd fd fa fa fd fd
  0x0c067fff99f0: fd fd fa fa 00 00 00 07 fa fa 00 00 01 fa fa fa
=>0x0c067fff9a00: 00 00 04 fa fa fa fd fd[fd]fd fa fa fa fa fa fa
  0x0c067fff9a10: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c067fff9a20: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c067fff9a30: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c067fff9a40: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c067fff9a50: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
  Shadow gap:              cc
==70246==ABORTING
-------


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v2 0/2] fix race in rte_thread_create failure path
  2023-03-14 11:47   ` [PATCH v2 0/2] fix race in rte_thread_create failure path David Marchand
@ 2023-03-14 13:59     ` Tyler Retzlaff
  0 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 13:59 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas

On Tue, Mar 14, 2023 at 12:47:54PM +0100, David Marchand wrote:
> On Tue, Mar 14, 2023 at 12:31 AM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > v2:
> >   * new approach over v1 of the patch to avoid using pthread np API that
> >     is not available on Alpine Linux.
> >   * to conform to rte_thread_create parameter const qualification include
> >     an additional patch to const qualify rte_thread_set_affinity cpusetp
> >     parameter.
> >
> > Tyler Retzlaff (2):
> >   eal: make cpusetp to rte thread set affinity const
> >   eal: fix failure path race setting new thread affinity
> >
> >  lib/eal/common/eal_common_thread.c |  6 ++---
> >  lib/eal/include/rte_thread.h       |  2 +-
> >  lib/eal/unix/rte_thread.c          | 52 ++++++++++++++++++++++++++++++--------
> >  3 files changed, 46 insertions(+), 14 deletions(-)
> 
> ASan flagged some use after free.
> See logs https://github.com/ovsrobot/dpdk/suites/11537702259/artifacts/597032673
> 
> 24/90 DPDK:fast-tests / lcores_autotest       FAIL     1.72 s (exit status 1)

thanks, i'm waiting on a system to test plan to investigate today if it
becomes available.

ty

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v3 0/2] fix race in rte_thread_create failure path
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (2 preceding siblings ...)
  2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-14 22:44 ` Tyler Retzlaff
  2023-03-14 22:44   ` [PATCH v3 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
  2023-03-14 22:44   ` [PATCH v3 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  2023-03-14 22:50 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:44 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

v3:
  * don't free wrapper context from new thread now that wrapper
    completion is complete before returning from creating thread.

v2:
  * new approach over v1 of the patch to avoid using pthread np API that
    is not available on Alpine Linux.
  * to conform to rte_thread_create parameter const qualification include
    an additional patch to const qualify rte_thread_set_affinity cpusetp
    parameter.

Tyler Retzlaff (2):
  eal: make cpusetp to rte thread set affinity const
  eal: fix failure path race setting new thread affinity

 lib/eal/common/eal_common_thread.c |  6 ++---
 lib/eal/include/rte_thread.h       |  2 +-
 lib/eal/unix/rte_thread.c          | 51 ++++++++++++++++++++++++++++++--------
 3 files changed, 44 insertions(+), 15 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v3 1/2] eal: make cpusetp to rte thread set affinity const
  2023-03-14 22:44 ` [PATCH v3 " Tyler Retzlaff
@ 2023-03-14 22:44   ` Tyler Retzlaff
  2023-03-14 22:44   ` [PATCH v3 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:44 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

const qualify rte_cpuset_t *cpusetp parameter in the
rte_thread_set_affinity API.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/eal_common_thread.c | 6 +++---
 lib/eal/include/rte_thread.h       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 079a385..c9e5619 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -34,7 +34,7 @@ unsigned rte_socket_id(void)
 }
 
 static int
-eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+eal_cpuset_socket_id(const rte_cpuset_t *cpusetp)
 {
 	unsigned cpu = 0;
 	int socket_id = SOCKET_ID_ANY;
@@ -62,7 +62,7 @@ unsigned rte_socket_id(void)
 }
 
 static void
-thread_update_affinity(rte_cpuset_t *cpusetp)
+thread_update_affinity(const rte_cpuset_t *cpusetp)
 {
 	unsigned int lcore_id = rte_lcore_id();
 
@@ -83,7 +83,7 @@ unsigned rte_socket_id(void)
 }
 
 int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+rte_thread_set_affinity(const rte_cpuset_t *cpusetp)
 {
 	if (rte_thread_set_affinity_by_id(rte_thread_self(), cpusetp) != 0) {
 		RTE_LOG(ERR, EAL, "rte_thread_set_affinity_by_id failed\n");
diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index ddd411e..e461354 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -348,7 +348,7 @@ int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
  * @return
  *   On success, return 0; otherwise return -1;
  */
-int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+int rte_thread_set_affinity(const rte_cpuset_t *cpusetp);
 
 /**
  * Get core affinity of the current thread.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v3 2/2] eal: fix failure path race setting new thread affinity
  2023-03-14 22:44 ` [PATCH v3 " Tyler Retzlaff
  2023-03-14 22:44   ` [PATCH v3 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
@ 2023-03-14 22:44   ` Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:44 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..fde506b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,8 +16,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +92,22 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
-
-	free(arg);
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void * thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +118,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +132,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,14 +188,19 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+			sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
 	}
+	free(ctx);
 
 	ctx = NULL;
 cleanup:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 0/2] fix race in rte_thread_create failure path
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (3 preceding siblings ...)
  2023-03-14 22:44 ` [PATCH v3 " Tyler Retzlaff
@ 2023-03-14 22:50 ` Tyler Retzlaff
  2023-03-14 22:50   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
  2023-03-14 22:50   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  2023-03-16  0:04 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:50 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

v4:
  * style fixes reported by CI

v3:
  * don't free wrapper context from new thread now that wrapper
    completion is complete before returning from creating thread.

v2:
  * new approach over v1 of the patch to avoid using pthread np API that
    is not available on Alpine Linux.
  * to conform to rte_thread_create parameter const qualification include
    an additional patch to const qualify rte_thread_set_affinity cpusetp
    parameter.

Tyler Retzlaff (2):
  eal: make cpusetp to rte thread set affinity const
  eal: fix failure path race setting new thread affinity

 lib/eal/common/eal_common_thread.c |  6 ++---
 lib/eal/include/rte_thread.h       |  2 +-
 lib/eal/unix/rte_thread.c          | 51 ++++++++++++++++++++++++++++++--------
 3 files changed, 44 insertions(+), 15 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const
  2023-03-14 22:50 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-14 22:50   ` Tyler Retzlaff
  2023-03-14 22:50   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:50 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff

const qualify rte_cpuset_t *cpusetp parameter in the
rte_thread_set_affinity API.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/eal_common_thread.c | 6 +++---
 lib/eal/include/rte_thread.h       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 079a385..c9e5619 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -34,7 +34,7 @@ unsigned rte_socket_id(void)
 }
 
 static int
-eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+eal_cpuset_socket_id(const rte_cpuset_t *cpusetp)
 {
 	unsigned cpu = 0;
 	int socket_id = SOCKET_ID_ANY;
@@ -62,7 +62,7 @@ unsigned rte_socket_id(void)
 }
 
 static void
-thread_update_affinity(rte_cpuset_t *cpusetp)
+thread_update_affinity(const rte_cpuset_t *cpusetp)
 {
 	unsigned int lcore_id = rte_lcore_id();
 
@@ -83,7 +83,7 @@ unsigned rte_socket_id(void)
 }
 
 int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+rte_thread_set_affinity(const rte_cpuset_t *cpusetp)
 {
 	if (rte_thread_set_affinity_by_id(rte_thread_self(), cpusetp) != 0) {
 		RTE_LOG(ERR, EAL, "rte_thread_set_affinity_by_id failed\n");
diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index ddd411e..e461354 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -348,7 +348,7 @@ int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
  * @return
  *   On success, return 0; otherwise return -1;
  */
-int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+int rte_thread_set_affinity(const rte_cpuset_t *cpusetp);
 
 /**
  * Get core affinity of the current thread.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-14 22:50 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-14 22:50   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
@ 2023-03-14 22:50   ` Tyler Retzlaff
  2023-03-15  1:20     ` Stephen Hemminger
  1 sibling, 1 reply; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:50 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..9fcf53b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,8 +16,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +92,22 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
-
-	free(arg);
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +118,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +132,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,14 +188,19 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+		sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
 	}
+	free(ctx);
 
 	ctx = NULL;
 cleanup:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-14 22:50   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
@ 2023-03-15  1:20     ` Stephen Hemminger
  2023-03-15  1:26       ` Tyler Retzlaff
  0 siblings, 1 reply; 34+ messages in thread
From: Stephen Hemminger @ 2023-03-15  1:20 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, david.marchand, stable

On Tue, 14 Mar 2023 15:50:39 -0700
Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:

> +	/* Wait for the thread wrapper to initialize thread successfully */
> +	while ((thread_wrapper_status =
> +		__atomic_load_n(&ctx->thread_wrapper_status,
> +		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
> +		sched_yield();

Using pthread condition variable would be better, and avoid
using sched_yield() which is deprecated and not guaranteed to
work in cases where threads have different priority.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-15  1:20     ` Stephen Hemminger
@ 2023-03-15  1:26       ` Tyler Retzlaff
  0 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-15  1:26 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, thomas, david.marchand, stable

On Tue, Mar 14, 2023 at 06:20:42PM -0700, Stephen Hemminger wrote:
> On Tue, 14 Mar 2023 15:50:39 -0700
> Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> 
> > +	/* Wait for the thread wrapper to initialize thread successfully */
> > +	while ((thread_wrapper_status =
> > +		__atomic_load_n(&ctx->thread_wrapper_status,
> > +		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
> > +		sched_yield();
> 
> Using pthread condition variable would be better, and avoid
> using sched_yield() which is deprecated and not guaranteed to
> work in cases where threads have different priority.

seems reasonable since this code is platform dependent, i'll spin a new
patch with a condvar then.

thanks for the suggestion.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 0/2] fix race in rte_thread_create failure path
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (4 preceding siblings ...)
  2023-03-14 22:50 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-16  0:04 ` Tyler Retzlaff
  2023-03-16  0:04   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
  2023-03-16  0:04   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  2023-03-16  0:07 ` [PATCH v5 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
  7 siblings, 2 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:04 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff

v4:
  * refactor patch to use pthread_cond_t to wait for thread start
    wrapper to complete work.
  * rename some variables to group those that are part of the
    wrapper synchronization.
  * use stack instead of heap allocation for thread start context.

v3:
  * don't free wrapper context from new thread now that wrapper
    completion is complete before returning from creating thread.

v2:
  * new approach over v1 of the patch to avoid using pthread np API that
    is not available on Alpine Linux.
  * to conform to rte_thread_create parameter const qualification include
    an additional patch to const qualify rte_thread_set_affinity cpusetp
    parameter.


Tyler Retzlaff (2):
  eal: make cpusetp to rte thread set affinity const
  eal: fix failure path race setting new thread affinity

 lib/eal/common/eal_common_thread.c |  6 ++--
 lib/eal/include/rte_thread.h       |  2 +-
 lib/eal/unix/rte_thread.c          | 70 +++++++++++++++++++++++---------------
 3 files changed, 47 insertions(+), 31 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const
  2023-03-16  0:04 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-16  0:04   ` Tyler Retzlaff
  2023-03-16  0:04   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:04 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff

const qualify rte_cpuset_t *cpusetp parameter in the
rte_thread_set_affinity API.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/eal_common_thread.c | 6 +++---
 lib/eal/include/rte_thread.h       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 079a385..c9e5619 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -34,7 +34,7 @@ unsigned rte_socket_id(void)
 }
 
 static int
-eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+eal_cpuset_socket_id(const rte_cpuset_t *cpusetp)
 {
 	unsigned cpu = 0;
 	int socket_id = SOCKET_ID_ANY;
@@ -62,7 +62,7 @@ unsigned rte_socket_id(void)
 }
 
 static void
-thread_update_affinity(rte_cpuset_t *cpusetp)
+thread_update_affinity(const rte_cpuset_t *cpusetp)
 {
 	unsigned int lcore_id = rte_lcore_id();
 
@@ -83,7 +83,7 @@ unsigned rte_socket_id(void)
 }
 
 int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+rte_thread_set_affinity(const rte_cpuset_t *cpusetp)
 {
 	if (rte_thread_set_affinity_by_id(rte_thread_self(), cpusetp) != 0) {
 		RTE_LOG(ERR, EAL, "rte_thread_set_affinity_by_id failed\n");
diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index ddd411e..e461354 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -348,7 +348,7 @@ int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
  * @return
  *   On success, return 0; otherwise return -1;
  */
-int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+int rte_thread_set_affinity(const rte_cpuset_t *cpusetp);
 
 /**
  * Get core affinity of the current thread.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-16  0:04 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-16  0:04   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
@ 2023-03-16  0:04   ` Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:04 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5992b04 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,9 +16,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	volatile int wrapper_done;
 };
 
 static int
@@ -81,13 +86,29 @@ struct thread_routine_ctx {
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
 
-	free(arg);
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+	}
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = 1;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
+
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +119,17 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -158,24 +176,22 @@ struct thread_routine_ctx {
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (ctx.wrapper_done != 1)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v5 0/2] fix race in rte_thread_create failure path
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (5 preceding siblings ...)
  2023-03-16  0:04 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-16  0:07 ` Tyler Retzlaff
  2023-03-16  0:07   ` [PATCH v5 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
  2023-03-16  0:07   ` [PATCH v5 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
  7 siblings, 2 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:07 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff

v5:
  * refactor patch to use pthread_cond_t to wait for thread start
    wrapper to complete work.
  * rename some variables to group those that are part of the
    wrapper synchronization.
  * use stack instead of heap allocation for thread start context.

v4:
  * style fixes reported by CI

v3:
  * don't free wrapper context from new thread now that wrapper
    completion is complete before returning from creating thread.

v2:
  * new approach over v1 of the patch to avoid using pthread np API that
    is not available on Alpine Linux.
  * to conform to rte_thread_create parameter const qualification include
    an additional patch to const qualify rte_thread_set_affinity cpusetp
    parameter.

Tyler Retzlaff (2):
  eal: make cpusetp to rte thread set affinity const
  eal: fix failure path race setting new thread affinity

 lib/eal/common/eal_common_thread.c |  6 ++--
 lib/eal/include/rte_thread.h       |  2 +-
 lib/eal/unix/rte_thread.c          | 70 +++++++++++++++++++++++---------------
 3 files changed, 47 insertions(+), 31 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v5 1/2] eal: make cpusetp to rte thread set affinity const
  2023-03-16  0:07 ` [PATCH v5 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-16  0:07   ` Tyler Retzlaff
  2023-03-16  0:07   ` [PATCH v5 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
  1 sibling, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:07 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff

const qualify rte_cpuset_t *cpusetp parameter in the
rte_thread_set_affinity API.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/common/eal_common_thread.c | 6 +++---
 lib/eal/include/rte_thread.h       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 079a385..c9e5619 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -34,7 +34,7 @@ unsigned rte_socket_id(void)
 }
 
 static int
-eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+eal_cpuset_socket_id(const rte_cpuset_t *cpusetp)
 {
 	unsigned cpu = 0;
 	int socket_id = SOCKET_ID_ANY;
@@ -62,7 +62,7 @@ unsigned rte_socket_id(void)
 }
 
 static void
-thread_update_affinity(rte_cpuset_t *cpusetp)
+thread_update_affinity(const rte_cpuset_t *cpusetp)
 {
 	unsigned int lcore_id = rte_lcore_id();
 
@@ -83,7 +83,7 @@ unsigned rte_socket_id(void)
 }
 
 int
-rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+rte_thread_set_affinity(const rte_cpuset_t *cpusetp)
 {
 	if (rte_thread_set_affinity_by_id(rte_thread_self(), cpusetp) != 0) {
 		RTE_LOG(ERR, EAL, "rte_thread_set_affinity_by_id failed\n");
diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h
index ddd411e..e461354 100644
--- a/lib/eal/include/rte_thread.h
+++ b/lib/eal/include/rte_thread.h
@@ -348,7 +348,7 @@ int rte_thread_get_affinity_by_id(rte_thread_t thread_id,
  * @return
  *   On success, return 0; otherwise return -1;
  */
-int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+int rte_thread_set_affinity(const rte_cpuset_t *cpusetp);
 
 /**
  * Get core affinity of the current thread.
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-16  0:07 ` [PATCH v5 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
  2023-03-16  0:07   ` [PATCH v5 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
@ 2023-03-16  0:07   ` Tyler Retzlaff
  2023-03-17 10:45     ` David Marchand
  1 sibling, 1 reply; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:07 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5992b04 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,9 +16,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	volatile int wrapper_done;
 };
 
 static int
@@ -81,13 +86,29 @@ struct thread_routine_ctx {
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
 
-	free(arg);
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+	}
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = 1;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
+
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +119,17 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -158,24 +176,22 @@ struct thread_routine_ctx {
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (ctx.wrapper_done != 1)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-16  0:07   ` [PATCH v5 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
@ 2023-03-17 10:45     ` David Marchand
  2023-03-17 14:49       ` Tyler Retzlaff
  0 siblings, 1 reply; 34+ messages in thread
From: David Marchand @ 2023-03-17 10:45 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, stephen, stable

On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
>
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
>  1 file changed, 43 insertions(+), 27 deletions(-)
>
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 37ebfcf..5992b04 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -16,9 +16,14 @@ struct eal_tls_key {
>         pthread_key_t thread_index;
>  };
>
> -struct thread_routine_ctx {
> +struct thread_start_context {
>         rte_thread_func thread_func;
> -       void *routine_args;
> +       void *thread_args;
> +       const rte_thread_attr_t *thread_attr;
> +       pthread_mutex_t wrapper_mutex;
> +       pthread_cond_t wrapper_cond;
> +       int wrapper_ret;
> +       volatile int wrapper_done;

One question.

I see that wrapper_done is accessed under wrapper_mutex.
Is volatile needed?

(nit: a boolean is probably enough too)

I was thinking of squashing below diff:

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 5992b04a45..5ab5267ca3 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -23,7 +23,7 @@ struct thread_start_context {
        pthread_mutex_t wrapper_mutex;
        pthread_cond_t wrapper_cond;
        int wrapper_ret;
-       volatile int wrapper_done;
+       bool wrapper_done;
 };

 static int
@@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)

        pthread_mutex_lock(&ctx->wrapper_mutex);
        ctx->wrapper_ret = ret;
-       ctx->wrapper_done = 1;
+       ctx->wrapper_done = true;
        pthread_cond_signal(&ctx->wrapper_cond);
        pthread_mutex_unlock(&ctx->wrapper_mutex);

@@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
                .thread_func = thread_func,
                .thread_args = args,
                .thread_attr = thread_attr,
+               .wrapper_done = false,
                .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
                .wrapper_cond = PTHREAD_COND_INITIALIZER,
        };
@@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
                        goto cleanup;
                }

-
                if (thread_attr->priority ==
                                RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
                        ret = ENOTSUP;
@@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
        }

        pthread_mutex_lock(&ctx.wrapper_mutex);
-       while (ctx.wrapper_done != 1)
+       while (!ctx.wrapper_done)
                pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
        ret = ctx.wrapper_ret;
        pthread_mutex_unlock(&ctx.wrapper_mutex);


The rest lgtmn thanks Tyler.



-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 10:45     ` David Marchand
@ 2023-03-17 14:49       ` Tyler Retzlaff
  2023-03-17 18:51         ` David Marchand
  0 siblings, 1 reply; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 14:49 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, stephen, stable

On Fri, Mar 17, 2023 at 11:45:08AM +0100, David Marchand wrote:
> On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve the above by setting the affinity from the newly created thread
> > using a condition variable to signal the completion of the thread
> > start wrapper having completed.
> >
> > Since we are now waiting for the thread start wrapper to complete we can
> > allocate the thread start wrapper context on the stack. While here clean
> > up the variable naming in the context to better highlight the fields of
> > the context require synchronization between the creating and created
> > thread.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
> >  1 file changed, 43 insertions(+), 27 deletions(-)
> >
> > diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> > index 37ebfcf..5992b04 100644
> > --- a/lib/eal/unix/rte_thread.c
> > +++ b/lib/eal/unix/rte_thread.c
> > @@ -16,9 +16,14 @@ struct eal_tls_key {
> >         pthread_key_t thread_index;
> >  };
> >
> > -struct thread_routine_ctx {
> > +struct thread_start_context {
> >         rte_thread_func thread_func;
> > -       void *routine_args;
> > +       void *thread_args;
> > +       const rte_thread_attr_t *thread_attr;
> > +       pthread_mutex_t wrapper_mutex;
> > +       pthread_cond_t wrapper_cond;
> > +       int wrapper_ret;
> > +       volatile int wrapper_done;
> 
> One question.
> 
> I see that wrapper_done is accessed under wrapper_mutex.
> Is volatile needed?

I'm not entirely certain. i'm being cautious since i can conceive of the
load in the loop being optimized as a single load by the compiler. but
again i'm not sure, i always like to learn if someone knows better.

> 
> (nit: a boolean is probably enough too)

I have no issue with it being a _Bool if you want to adjust it for that
i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
code seems to prefer int so that's why i chose it. if we use the macro
bool then we should include stdbool.h directly into this translation
unit.

> 
> I was thinking of squashing below diff:

Yeah, no objection. you can decide if you want to keep the volatile or
not and add the stdbool.h include.

Thanks for reviewing, appreciate it.

> 
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 5992b04a45..5ab5267ca3 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -23,7 +23,7 @@ struct thread_start_context {
>         pthread_mutex_t wrapper_mutex;
>         pthread_cond_t wrapper_cond;
>         int wrapper_ret;
> -       volatile int wrapper_done;
> +       bool wrapper_done;
>  };
> 
>  static int
> @@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)
> 
>         pthread_mutex_lock(&ctx->wrapper_mutex);
>         ctx->wrapper_ret = ret;
> -       ctx->wrapper_done = 1;
> +       ctx->wrapper_done = true;
>         pthread_cond_signal(&ctx->wrapper_cond);
>         pthread_mutex_unlock(&ctx->wrapper_mutex);
> 
> @@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
>                 .thread_func = thread_func,
>                 .thread_args = args,
>                 .thread_attr = thread_attr,
> +               .wrapper_done = false,
>                 .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
>                 .wrapper_cond = PTHREAD_COND_INITIALIZER,
>         };
> @@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
>                         goto cleanup;
>                 }
> 
> -
>                 if (thread_attr->priority ==
>                                 RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
>                         ret = ENOTSUP;
> @@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
>         }
> 
>         pthread_mutex_lock(&ctx.wrapper_mutex);
> -       while (ctx.wrapper_done != 1)
> +       while (!ctx.wrapper_done)
>                 pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
>         ret = ctx.wrapper_ret;
>         pthread_mutex_unlock(&ctx.wrapper_mutex);
> 
> 
> The rest lgtmn thanks Tyler.
> 
> 
> 
> -- 
> David Marchand

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 14:49       ` Tyler Retzlaff
@ 2023-03-17 18:51         ` David Marchand
  2023-03-17 21:20           ` Tyler Retzlaff
  0 siblings, 1 reply; 34+ messages in thread
From: David Marchand @ 2023-03-17 18:51 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, stephen, stable, Dodji Seketeli

On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
> > > -struct thread_routine_ctx {
> > > +struct thread_start_context {
> > >         rte_thread_func thread_func;
> > > -       void *routine_args;
> > > +       void *thread_args;
> > > +       const rte_thread_attr_t *thread_attr;
> > > +       pthread_mutex_t wrapper_mutex;
> > > +       pthread_cond_t wrapper_cond;
> > > +       int wrapper_ret;
> > > +       volatile int wrapper_done;
> >
> > One question.
> >
> > I see that wrapper_done is accessed under wrapper_mutex.
> > Is volatile needed?
>
> I'm not entirely certain. i'm being cautious since i can conceive of the
> load in the loop being optimized as a single load by the compiler. but
> again i'm not sure, i always like to learn if someone knows better.

After an interesting discussion with Dodji on C99 and side effects
(5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
need this volatile.


>
> >
> > (nit: a boolean is probably enough too)
>
> I have no issue with it being a _Bool if you want to adjust it for that
> i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> code seems to prefer int so that's why i chose it. if we use the macro
> bool then we should include stdbool.h directly into this translation
> unit.
>
> >
> > I was thinking of squashing below diff:
>
> Yeah, no objection. you can decide if you want to keep the volatile or
> not and add the stdbool.h include.
>
> Thanks for reviewing, appreciate it.

This is a fix but this v5 had an additional change in affinity setting
(switching to rte_thread_set_affinity()).
To be on the safe side wrt backport, I'll also revert to calling
rte_thread_set_affinity_by_id as this is what was being used before.
And this removes the need for patch 1.

Sending a v6 soon, so that it goes through the CI before rc3.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH v6] eal/unix: fix thread creation
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (6 preceding siblings ...)
  2023-03-16  0:07 ` [PATCH v5 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
@ 2023-03-17 18:52 ` David Marchand
  2023-03-17 21:24   ` Tyler Retzlaff
  2023-03-18 18:26   ` David Marchand
  7 siblings, 2 replies; 34+ messages in thread
From: David Marchand @ 2023-03-17 18:52 UTC (permalink / raw)
  To: dev
  Cc: thomas, stephen, Tyler Retzlaff, stable, Narcisa Vasile, Dmitry Kozlyuk

From: Tyler Retzlaff <roretzla@linux.microsoft.com>

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v5:
- dropped volatile and switched to boolean for wrapper_done,
- reverted to rte_thread_set_affinity_by_id() call,

---
 lib/eal/unix/rte_thread.c | 73 ++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcfca1..f4076122a4 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -5,6 +5,7 @@
 
 #include <errno.h>
 #include <pthread.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -16,9 +17,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	bool wrapper_done;
 };
 
 static int
@@ -81,13 +87,29 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity_by_id(rte_thread_self(), &ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
+	}
 
-	free(arg);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = true;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +120,18 @@ rte_thread_create(rte_thread_t *thread_id,
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_done = false,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -133,7 +153,6 @@ rte_thread_create(rte_thread_t *thread_id,
 			goto cleanup;
 		}
 
-
 		if (thread_attr->priority ==
 				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
 			ret = ENOTSUP;
@@ -158,24 +177,22 @@ rte_thread_create(rte_thread_t *thread_id,
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (!ctx.wrapper_done)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
2.39.2


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 18:51         ` David Marchand
@ 2023-03-17 21:20           ` Tyler Retzlaff
  0 siblings, 0 replies; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 21:20 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, stephen, stable, Dodji Seketeli

On Fri, Mar 17, 2023 at 07:51:25PM +0100, David Marchand wrote:
> On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> > > > -struct thread_routine_ctx {
> > > > +struct thread_start_context {
> > > >         rte_thread_func thread_func;
> > > > -       void *routine_args;
> > > > +       void *thread_args;
> > > > +       const rte_thread_attr_t *thread_attr;
> > > > +       pthread_mutex_t wrapper_mutex;
> > > > +       pthread_cond_t wrapper_cond;
> > > > +       int wrapper_ret;
> > > > +       volatile int wrapper_done;
> > >
> > > One question.
> > >
> > > I see that wrapper_done is accessed under wrapper_mutex.
> > > Is volatile needed?
> >
> > I'm not entirely certain. i'm being cautious since i can conceive of the
> > load in the loop being optimized as a single load by the compiler. but
> > again i'm not sure, i always like to learn if someone knows better.
> 
> After an interesting discussion with Dodji on C99 and side effects
> (5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
> need this volatile.

Thanks for the references, based on the reading i agree we can drop the
volatile.

> 
> 
> >
> > >
> > > (nit: a boolean is probably enough too)
> >
> > I have no issue with it being a _Bool if you want to adjust it for that
> > i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> > code seems to prefer int so that's why i chose it. if we use the macro
> > bool then we should include stdbool.h directly into this translation
> > unit.
> >
> > >
> > > I was thinking of squashing below diff:
> >
> > Yeah, no objection. you can decide if you want to keep the volatile or
> > not and add the stdbool.h include.
> >
> > Thanks for reviewing, appreciate it.
> 
> This is a fix but this v5 had an additional change in affinity setting
> (switching to rte_thread_set_affinity()).
> To be on the safe side wrt backport, I'll also revert to calling
> rte_thread_set_affinity_by_id as this is what was being used before.
> And this removes the need for patch 1.

Is it worth merging the const patch but not backporting? I'm not fussed
either way.

> 
> Sending a v6 soon, so that it goes through the CI before rc3.

Yes, great.

Thanks David!

> 
> 
> -- 
> David Marchand

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
@ 2023-03-17 21:24   ` Tyler Retzlaff
  2023-03-18 18:26     ` David Marchand
  2023-03-18 18:26   ` David Marchand
  1 sibling, 1 reply; 34+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 21:24 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 07:52:29PM +0100, David Marchand wrote:
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> 
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
> 
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
> 
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
> 
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
> 
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Looks good to me, not sure if you need a Reviewed-by: from me for the
changes but here is one anyway.

v5
Reviewed-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 21:24   ` Tyler Retzlaff
@ 2023-03-18 18:26     ` David Marchand
  0 siblings, 0 replies; 34+ messages in thread
From: David Marchand @ 2023-03-18 18:26 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 10:24 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Fri, Mar 17, 2023 at 07:52:29PM +0100, David Marchand wrote:
> > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve the above by setting the affinity from the newly created thread
> > using a condition variable to signal the completion of the thread
> > start wrapper having completed.
> >
> > Since we are now waiting for the thread start wrapper to complete we can
> > allocate the thread start wrapper context on the stack. While here clean
> > up the variable naming in the context to better highlight the fields of
> > the context require synchronization between the creating and created
> > thread.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> Looks good to me, not sure if you need a Reviewed-by: from me for the
> changes but here is one anyway.
>
> v5
> Reviewed-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

Thanks for the review, I'll keep your SoB only on the patch.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
  2023-03-17 21:24   ` Tyler Retzlaff
@ 2023-03-18 18:26   ` David Marchand
  1 sibling, 0 replies; 34+ messages in thread
From: David Marchand @ 2023-03-18 18:26 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 7:52 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
>
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Applied thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2023-03-18 18:26 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
2023-03-09  9:17   ` David Marchand
2023-03-09  9:58     ` Thomas Monjalon
2023-03-09 20:49       ` Tyler Retzlaff
2023-03-09 21:05         ` David Marchand
2023-03-13 23:31 ` [PATCH v2 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
2023-03-13 23:31   ` [PATCH v2 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
2023-03-13 23:31   ` [PATCH v2 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
2023-03-14 11:47   ` [PATCH v2 0/2] fix race in rte_thread_create failure path David Marchand
2023-03-14 13:59     ` Tyler Retzlaff
2023-03-14 22:44 ` [PATCH v3 " Tyler Retzlaff
2023-03-14 22:44   ` [PATCH v3 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
2023-03-14 22:44   ` [PATCH v3 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
2023-03-14 22:50 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
2023-03-14 22:50   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
2023-03-14 22:50   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
2023-03-15  1:20     ` Stephen Hemminger
2023-03-15  1:26       ` Tyler Retzlaff
2023-03-16  0:04 ` [PATCH v4 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
2023-03-16  0:04   ` [PATCH v4 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
2023-03-16  0:04   ` [PATCH v4 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
2023-03-16  0:07 ` [PATCH v5 0/2] fix race in rte_thread_create failure path Tyler Retzlaff
2023-03-16  0:07   ` [PATCH v5 1/2] eal: make cpusetp to rte thread set affinity const Tyler Retzlaff
2023-03-16  0:07   ` [PATCH v5 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
2023-03-17 10:45     ` David Marchand
2023-03-17 14:49       ` Tyler Retzlaff
2023-03-17 18:51         ` David Marchand
2023-03-17 21:20           ` Tyler Retzlaff
2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
2023-03-17 21:24   ` Tyler Retzlaff
2023-03-18 18:26     ` David Marchand
2023-03-18 18:26   ` David Marchand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).