patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 1/2] eal: fix failure race and behavior of thread create
@ 2023-03-02 18:44 Tyler Retzlaff
  2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
                   ` (7 more replies)
  0 siblings, 8 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-02 18:44 UTC (permalink / raw)
  To: dev; +Cc: david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by using the pthread_setaffinity_np
prior to thread creation to set the affinity of the created thread. By
doing this no failure paths exist after pthread_create returns
successfully.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org
Cc: roretzla@linux.microsoft.com

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5bf633b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -155,6 +155,17 @@ struct thread_routine_ctx {
 			RTE_LOG(DEBUG, EAL, "pthread_attr_setschedparam failed\n");
 			goto cleanup;
 		}
+
+		if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+			ret = pthread_attr_setaffinity_np(attrp,
+				sizeof(thread_attr->cpuset),
+				&thread_attr->cpuset);
+			if (ret != 0) {
+				RTE_LOG(DEBUG, EAL,
+					"pthread_attr_setaffinity_np failed\n");
+				goto cleanup;
+			}
+		}
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
@@ -164,15 +175,6 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
-
 	ctx = NULL;
 cleanup:
 	free(ctx);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/2] eal/windows: fix create thread failure behavior
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
@ 2023-03-02 18:44 ` Tyler Retzlaff
  2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-02 18:44 UTC (permalink / raw)
  To: dev; +Cc: david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after CreateThread may fail. Such
a failure is reported but strands the newly created thread in a
suspended state.

Resolve the above issue by notifying the newly created thread that
it should terminate as soon as it is resumed, while still continuing to
free the ctx.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org
Cc: roretzla@linux.microsoft.com

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/windows/rte_thread.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/lib/eal/windows/rte_thread.c b/lib/eal/windows/rte_thread.c
index 8556a84..e528ac9 100644
--- a/lib/eal/windows/rte_thread.c
+++ b/lib/eal/windows/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
 
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	bool thread_init_failed;
 	void *routine_args;
 };
 
@@ -167,9 +168,13 @@ struct thread_routine_ctx {
 thread_func_wrapper(void *arg)
 {
 	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	const bool thread_exit = __atomic_load_n(&ctx.thread_init_failed, __ATOMIC_ACQUIRE);
 
 	free(arg);
 
+	if (thread_exit)
+		return 0;
+
 	return (DWORD)ctx.thread_func(ctx.routine_args);
 }
 
@@ -183,6 +188,7 @@ struct thread_routine_ctx {
 	HANDLE thread_handle = NULL;
 	GROUP_AFFINITY thread_affinity;
 	struct thread_routine_ctx *ctx;
+	bool thread_exit = false;
 
 	ctx = calloc(1, sizeof(*ctx));
 	if (ctx == NULL) {
@@ -192,6 +198,7 @@ struct thread_routine_ctx {
 	}
 	ctx->routine_args = args;
 	ctx->thread_func = thread_func;
+	ctx->thread_init_failed = false;
 
 	thread_handle = CreateThread(NULL, 0, thread_func_wrapper, ctx,
 		CREATE_SUSPENDED, &tid);
@@ -209,23 +216,29 @@ struct thread_routine_ctx {
 							);
 			if (ret != 0) {
 				RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
-				goto cleanup;
+				thread_exit = true;
+				goto resume_thread;
 			}
 
 			if (!SetThreadGroupAffinity(thread_handle,
 						    &thread_affinity, NULL)) {
 				ret = thread_log_last_error("SetThreadGroupAffinity()");
-				goto cleanup;
+				thread_exit = true;
+				goto resume_thread;
 			}
 		}
 		ret = rte_thread_set_priority(*thread_id,
 				thread_attr->priority);
 		if (ret != 0) {
 			RTE_LOG(DEBUG, EAL, "Unable to set thread priority\n");
-			goto cleanup;
+			thread_exit = true;
+			goto resume_thread;
 		}
 	}
 
+resume_thread:
+	__atomic_store_n(&ctx->thread_init_failed, thread_exit, __ATOMIC_RELEASE);
+
 	if (ResumeThread(thread_handle) == (DWORD)-1) {
 		ret = thread_log_last_error("ResumeThread()");
 		goto cleanup;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
  2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
@ 2023-03-07 14:33 ` David Marchand
  2023-03-09  9:17   ` David Marchand
       [not found] ` <1678750267-3829-1-git-send-email-roretzla@linux.microsoft.com>
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 22+ messages in thread
From: David Marchand @ 2023-03-07 14:33 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, stable

On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve both of the above issues by using the pthread_setaffinity_np
> prior to thread creation to set the affinity of the created thread. By
> doing this no failure paths exist after pthread_create returns
> successfully.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
> Cc: roretzla@linux.microsoft.com
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

Reviewed-by: David Marchand <david.marchand@redhat.com>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
@ 2023-03-09  9:17   ` David Marchand
  2023-03-09  9:58     ` Thomas Monjalon
  0 siblings, 1 reply; 22+ messages in thread
From: David Marchand @ 2023-03-09  9:17 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, stable

On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve both of the above issues by using the pthread_setaffinity_np
> > prior to thread creation to set the affinity of the created thread. By
> > doing this no failure paths exist after pthread_create returns
> > successfully.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> > Cc: roretzla@linux.microsoft.com
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Reviewed-by: David Marchand <david.marchand@redhat.com>

Series applied, thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09  9:17   ` David Marchand
@ 2023-03-09  9:58     ` Thomas Monjalon
  2023-03-09 20:49       ` Tyler Retzlaff
  0 siblings, 1 reply; 22+ messages in thread
From: Thomas Monjalon @ 2023-03-09  9:58 UTC (permalink / raw)
  To: Tyler Retzlaff, David Marchand; +Cc: dev, stable

09/03/2023 10:17, David Marchand:
> On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > <roretzla@linux.microsoft.com> wrote:
> > >
> > > In rte_thread_create setting affinity after pthread_create may fail.
> > > Such a failure should result in the entire rte_thread_create failing
> > > but doesn't.
> > >
> > > Additionally if there is a failure to set affinity a race exists where
> > > the creating thread will free ctx and depending on scheduling of the new
> > > thread it may also free ctx (double free).
> > >
> > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > prior to thread creation to set the affinity of the created thread. By
> > > doing this no failure paths exist after pthread_create returns
> > > successfully.
> > >
> > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > Cc: stable@dpdk.org
> > > Cc: roretzla@linux.microsoft.com
> > >
> > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Reviewed-by: David Marchand <david.marchand@redhat.com>
> 
> Series applied, thanks.

Unfortunately we cannot merge this patch
because it does not compile on Alpine Linux (musl libc):

lib/eal/unix/rte_thread.c:160:31: error:
implicit declaration of function 'pthread_attr_setaffinity_np'

Is it possible to fix the race without using pthread_attr_setaffinity_np?



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09  9:58     ` Thomas Monjalon
@ 2023-03-09 20:49       ` Tyler Retzlaff
  2023-03-09 21:05         ` David Marchand
  0 siblings, 1 reply; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-09 20:49 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: David Marchand, dev, stable

On Thu, Mar 09, 2023 at 10:58:06AM +0100, Thomas Monjalon wrote:
> 09/03/2023 10:17, David Marchand:
> > On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > > <roretzla@linux.microsoft.com> wrote:
> > > >
> > > > In rte_thread_create setting affinity after pthread_create may fail.
> > > > Such a failure should result in the entire rte_thread_create failing
> > > > but doesn't.
> > > >
> > > > Additionally if there is a failure to set affinity a race exists where
> > > > the creating thread will free ctx and depending on scheduling of the new
> > > > thread it may also free ctx (double free).
> > > >
> > > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > > prior to thread creation to set the affinity of the created thread. By
> > > > doing this no failure paths exist after pthread_create returns
> > > > successfully.
> > > >
> > > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > > Cc: stable@dpdk.org
> > > > Cc: roretzla@linux.microsoft.com
> > > >
> > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > Reviewed-by: David Marchand <david.marchand@redhat.com>
> > 
> > Series applied, thanks.
> 
> Unfortunately we cannot merge this patch
> because it does not compile on Alpine Linux (musl libc):
> 
> lib/eal/unix/rte_thread.c:160:31: error:
> implicit declaration of function 'pthread_attr_setaffinity_np'

i didn't get any CI failure for this. did i just miss it?

> 
> Is it possible to fix the race without using pthread_attr_setaffinity_np?
> 

it seems we never allowed threads to be created with a set affinity when
using pthread_create directly (that was portable to alpine linux).  for worker
threads the start_routine is setting the affinity from the new thread.

certainly we can make this work by doing the same thing, but we'll have
to adjust the start routine wrapper to synchronize/wait for the new
thread to set the affinity and if it fails terminate the new thread
cleanly.

i don't have a way to build for alpine linux or run the unit tests, does
someone want to make the above suggested adjustment? or i can try and
make a patch but someone else will have to carefully review and test.

let me know how you'd like to proceed.

ty

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] eal: fix failure race and behavior of thread create
  2023-03-09 20:49       ` Tyler Retzlaff
@ 2023-03-09 21:05         ` David Marchand
  0 siblings, 0 replies; 22+ messages in thread
From: David Marchand @ 2023-03-09 21:05 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: Thomas Monjalon, dev, stable

On Thu, Mar 9, 2023 at 9:49 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Thu, Mar 09, 2023 at 10:58:06AM +0100, Thomas Monjalon wrote:
> > 09/03/2023 10:17, David Marchand:
> > > On Tue, Mar 7, 2023 at 3:33 PM David Marchand <david.marchand@redhat.com> wrote:
> > > > On Thu, Mar 2, 2023 at 7:44 PM Tyler Retzlaff
> > > > <roretzla@linux.microsoft.com> wrote:
> > > > >
> > > > > In rte_thread_create setting affinity after pthread_create may fail.
> > > > > Such a failure should result in the entire rte_thread_create failing
> > > > > but doesn't.
> > > > >
> > > > > Additionally if there is a failure to set affinity a race exists where
> > > > > the creating thread will free ctx and depending on scheduling of the new
> > > > > thread it may also free ctx (double free).
> > > > >
> > > > > Resolve both of the above issues by using the pthread_setaffinity_np
> > > > > prior to thread creation to set the affinity of the created thread. By
> > > > > doing this no failure paths exist after pthread_create returns
> > > > > successfully.
> > > > >
> > > > > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > > > > Cc: stable@dpdk.org
> > > > > Cc: roretzla@linux.microsoft.com
> > > > >
> > > > > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > > > Reviewed-by: David Marchand <david.marchand@redhat.com>
> > >
> > > Series applied, thanks.
> >
> > Unfortunately we cannot merge this patch
> > because it does not compile on Alpine Linux (musl libc):
> >
> > lib/eal/unix/rte_thread.c:160:31: error:
> > implicit declaration of function 'pthread_attr_setaffinity_np'
>
> i didn't get any CI failure for this. did i just miss it?

Count on me, I would have complained if there was a CI issue ;-).


>
> >
> > Is it possible to fix the race without using pthread_attr_setaffinity_np?
> >
>
> it seems we never allowed threads to be created with a set affinity when
> using pthread_create directly (that was portable to alpine linux).  for worker
> threads the start_routine is setting the affinity from the new thread.
>
> certainly we can make this work by doing the same thing, but we'll have
> to adjust the start routine wrapper to synchronize/wait for the new
> thread to set the affinity and if it fails terminate the new thread
> cleanly.
>
> i don't have a way to build for alpine linux or run the unit tests, does
> someone want to make the above suggested adjustment? or i can try and
> make a patch but someone else will have to carefully review and test.
>
> let me know how you'd like to proceed.

UNH is looking into re-enabling the Alpine job.


For the time being, if you have a github repository, I can propose a
quick patch using GHA:
https://github.com/david-marchand/dpdk/commit/ci

I had tested compilation with a previous version of this patch.
I just added running the unit tests (adding a checks: tests line in
the job matrix), let's see how it goes...
https://github.com/david-marchand/dpdk/actions/runs/4378715081/jobs/7663806639


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v2 2/2] eal: fix failure path race setting new thread affinity
       [not found] ` <1678750267-3829-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-13 23:31   ` Tyler Retzlaff
  0 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-13 23:31 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 52 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..a09fb08 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <rte_cycles.h>
 #include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_thread.h>
@@ -16,8 +17,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +93,24 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
 	free(arg);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +121,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +135,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,16 +191,21 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+			sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
+		free(ctx);
 	}
-
 	ctx = NULL;
+
 cleanup:
 	free(ctx);
 	if (attrp != NULL)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v3 2/2] eal: fix failure path race setting new thread affinity
       [not found] ` <1678833856-9314-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-14 22:44   ` Tyler Retzlaff
  0 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:44 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..fde506b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,8 +16,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +92,22 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
-
-	free(arg);
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void * thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +118,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +132,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,14 +188,19 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+			sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
 	}
+	free(ctx);
 
 	ctx = NULL;
 cleanup:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
       [not found] ` <1678834239-11569-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-14 22:50   ` Tyler Retzlaff
  2023-03-15  1:20     ` Stephen Hemminger
  0 siblings, 1 reply; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-14 22:50 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve both of the above issues by setting the affinity from the newly
created thread instead of after thread creation. To achieve this modify
the existing thread wrapper to allow the creating thread to wait on the
result of the set affinity operation.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..9fcf53b 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,8 +16,17 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
+enum __rte_thread_wrapper_status {
+	THREAD_WRAPPER_LAUNCHING, /* Yet to call thread_func function */
+	THREAD_WRAPPER_RUNNING, /* Thread is running successfully */
+	THREAD_WRAPPER_ERROR /* Thread thread_wrapper encountered an error */
+};
+
 struct thread_routine_ctx {
 	rte_thread_func thread_func;
+	const rte_thread_attr_t *thread_attr;
+	int thread_wrapper_ret;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	void *routine_args;
 };
 
@@ -83,11 +92,22 @@ struct thread_routine_ctx {
 static void *
 thread_func_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
-
-	free(arg);
+	struct thread_routine_ctx *ctx = (struct thread_routine_ctx *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->routine_args;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ctx->thread_wrapper_ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ctx->thread_wrapper_ret != 0) {
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+			__atomic_store_n(&ctx->thread_wrapper_status,
+				THREAD_WRAPPER_ERROR, __ATOMIC_RELEASE);
+		}
+	}
+	__atomic_store_n(&ctx->thread_wrapper_status,
+		THREAD_WRAPPER_RUNNING, __ATOMIC_RELEASE);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,6 +118,7 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
+	enum __rte_thread_wrapper_status thread_wrapper_status;
 	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
@@ -111,7 +132,10 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 	ctx->routine_args = args;
+	ctx->thread_attr = thread_attr;
 	ctx->thread_func = thread_func;
+	ctx->thread_wrapper_ret = 0;
+	ctx->thread_wrapper_status = THREAD_WRAPPER_LAUNCHING;
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -164,14 +188,19 @@ struct thread_routine_ctx {
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
+	/* Wait for the thread wrapper to initialize thread successfully */
+	while ((thread_wrapper_status =
+		__atomic_load_n(&ctx->thread_wrapper_status,
+		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
+		sched_yield();
+
+	/* Check if the control thread encountered an error */
+	if (thread_wrapper_status == THREAD_WRAPPER_ERROR) {
+		/* thread wrapper is exiting */
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
+		ret = ctx->thread_wrapper_ret;
 	}
+	free(ctx);
 
 	ctx = NULL;
 cleanup:
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-14 22:50   ` [PATCH v4 " Tyler Retzlaff
@ 2023-03-15  1:20     ` Stephen Hemminger
  2023-03-15  1:26       ` Tyler Retzlaff
  0 siblings, 1 reply; 22+ messages in thread
From: Stephen Hemminger @ 2023-03-15  1:20 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, david.marchand, stable

On Tue, 14 Mar 2023 15:50:39 -0700
Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:

> +	/* Wait for the thread wrapper to initialize thread successfully */
> +	while ((thread_wrapper_status =
> +		__atomic_load_n(&ctx->thread_wrapper_status,
> +		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
> +		sched_yield();

Using pthread condition variable would be better, and avoid
using sched_yield() which is deprecated and not guaranteed to
work in cases where threads have different priority.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
  2023-03-15  1:20     ` Stephen Hemminger
@ 2023-03-15  1:26       ` Tyler Retzlaff
  0 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-15  1:26 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, thomas, david.marchand, stable

On Tue, Mar 14, 2023 at 06:20:42PM -0700, Stephen Hemminger wrote:
> On Tue, 14 Mar 2023 15:50:39 -0700
> Tyler Retzlaff <roretzla@linux.microsoft.com> wrote:
> 
> > +	/* Wait for the thread wrapper to initialize thread successfully */
> > +	while ((thread_wrapper_status =
> > +		__atomic_load_n(&ctx->thread_wrapper_status,
> > +		__ATOMIC_ACQUIRE)) == THREAD_WRAPPER_LAUNCHING)
> > +		sched_yield();
> 
> Using pthread condition variable would be better, and avoid
> using sched_yield() which is deprecated and not guaranteed to
> work in cases where threads have different priority.

seems reasonable since this code is platform dependent, i'll spin a new
patch with a condvar then.

thanks for the suggestion.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v4 2/2] eal: fix failure path race setting new thread affinity
       [not found] ` <1678925050-1955-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-16  0:04   ` Tyler Retzlaff
  0 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:04 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5992b04 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,9 +16,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	volatile int wrapper_done;
 };
 
 static int
@@ -81,13 +86,29 @@ struct thread_routine_ctx {
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
 
-	free(arg);
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+	}
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = 1;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
+
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +119,17 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -158,24 +176,22 @@ struct thread_routine_ctx {
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (ctx.wrapper_done != 1)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
       [not found] ` <1678925224-2706-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-16  0:07   ` Tyler Retzlaff
  2023-03-17 10:45     ` David Marchand
  0 siblings, 1 reply; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-16  0:07 UTC (permalink / raw)
  To: dev; +Cc: thomas, david.marchand, stephen, Tyler Retzlaff, stable

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcf..5992b04 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -16,9 +16,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	volatile int wrapper_done;
 };
 
 static int
@@ -81,13 +86,29 @@ struct thread_routine_ctx {
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
 
-	free(arg);
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity(&ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity failed\n");
+	}
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = 1;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
+
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +119,17 @@ struct thread_routine_ctx {
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -158,24 +176,22 @@ struct thread_routine_ctx {
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (ctx.wrapper_done != 1)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-16  0:07   ` [PATCH v5 " Tyler Retzlaff
@ 2023-03-17 10:45     ` David Marchand
  2023-03-17 14:49       ` Tyler Retzlaff
  0 siblings, 1 reply; 22+ messages in thread
From: David Marchand @ 2023-03-17 10:45 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, stephen, stable

On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
>
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
>  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
>  1 file changed, 43 insertions(+), 27 deletions(-)
>
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 37ebfcf..5992b04 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -16,9 +16,14 @@ struct eal_tls_key {
>         pthread_key_t thread_index;
>  };
>
> -struct thread_routine_ctx {
> +struct thread_start_context {
>         rte_thread_func thread_func;
> -       void *routine_args;
> +       void *thread_args;
> +       const rte_thread_attr_t *thread_attr;
> +       pthread_mutex_t wrapper_mutex;
> +       pthread_cond_t wrapper_cond;
> +       int wrapper_ret;
> +       volatile int wrapper_done;

One question.

I see that wrapper_done is accessed under wrapper_mutex.
Is volatile needed?

(nit: a boolean is probably enough too)

I was thinking of squashing below diff:

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 5992b04a45..5ab5267ca3 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -23,7 +23,7 @@ struct thread_start_context {
        pthread_mutex_t wrapper_mutex;
        pthread_cond_t wrapper_cond;
        int wrapper_ret;
-       volatile int wrapper_done;
+       bool wrapper_done;
 };

 static int
@@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)

        pthread_mutex_lock(&ctx->wrapper_mutex);
        ctx->wrapper_ret = ret;
-       ctx->wrapper_done = 1;
+       ctx->wrapper_done = true;
        pthread_cond_signal(&ctx->wrapper_cond);
        pthread_mutex_unlock(&ctx->wrapper_mutex);

@@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
                .thread_func = thread_func,
                .thread_args = args,
                .thread_attr = thread_attr,
+               .wrapper_done = false,
                .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
                .wrapper_cond = PTHREAD_COND_INITIALIZER,
        };
@@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
                        goto cleanup;
                }

-
                if (thread_attr->priority ==
                                RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
                        ret = ENOTSUP;
@@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
        }

        pthread_mutex_lock(&ctx.wrapper_mutex);
-       while (ctx.wrapper_done != 1)
+       while (!ctx.wrapper_done)
                pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
        ret = ctx.wrapper_ret;
        pthread_mutex_unlock(&ctx.wrapper_mutex);


The rest lgtmn thanks Tyler.



-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 10:45     ` David Marchand
@ 2023-03-17 14:49       ` Tyler Retzlaff
  2023-03-17 18:51         ` David Marchand
  0 siblings, 1 reply; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 14:49 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, stephen, stable

On Fri, Mar 17, 2023 at 11:45:08AM +0100, David Marchand wrote:
> On Thu, Mar 16, 2023 at 1:07 AM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve the above by setting the affinity from the newly created thread
> > using a condition variable to signal the completion of the thread
> > start wrapper having completed.
> >
> > Since we are now waiting for the thread start wrapper to complete we can
> > allocate the thread start wrapper context on the stack. While here clean
> > up the variable naming in the context to better highlight the fields of
> > the context require synchronization between the creating and created
> > thread.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > ---
> >  lib/eal/unix/rte_thread.c | 70 +++++++++++++++++++++++++++++------------------
> >  1 file changed, 43 insertions(+), 27 deletions(-)
> >
> > diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> > index 37ebfcf..5992b04 100644
> > --- a/lib/eal/unix/rte_thread.c
> > +++ b/lib/eal/unix/rte_thread.c
> > @@ -16,9 +16,14 @@ struct eal_tls_key {
> >         pthread_key_t thread_index;
> >  };
> >
> > -struct thread_routine_ctx {
> > +struct thread_start_context {
> >         rte_thread_func thread_func;
> > -       void *routine_args;
> > +       void *thread_args;
> > +       const rte_thread_attr_t *thread_attr;
> > +       pthread_mutex_t wrapper_mutex;
> > +       pthread_cond_t wrapper_cond;
> > +       int wrapper_ret;
> > +       volatile int wrapper_done;
> 
> One question.
> 
> I see that wrapper_done is accessed under wrapper_mutex.
> Is volatile needed?

I'm not entirely certain. i'm being cautious since i can conceive of the
load in the loop being optimized as a single load by the compiler. but
again i'm not sure, i always like to learn if someone knows better.

> 
> (nit: a boolean is probably enough too)

I have no issue with it being a _Bool if you want to adjust it for that
i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
code seems to prefer int so that's why i chose it. if we use the macro
bool then we should include stdbool.h directly into this translation
unit.

> 
> I was thinking of squashing below diff:

Yeah, no objection. you can decide if you want to keep the volatile or
not and add the stdbool.h include.

Thanks for reviewing, appreciate it.

> 
> diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
> index 5992b04a45..5ab5267ca3 100644
> --- a/lib/eal/unix/rte_thread.c
> +++ b/lib/eal/unix/rte_thread.c
> @@ -23,7 +23,7 @@ struct thread_start_context {
>         pthread_mutex_t wrapper_mutex;
>         pthread_cond_t wrapper_cond;
>         int wrapper_ret;
> -       volatile int wrapper_done;
> +       bool wrapper_done;
>  };
> 
>  static int
> @@ -101,7 +101,7 @@ thread_start_wrapper(void *arg)
> 
>         pthread_mutex_lock(&ctx->wrapper_mutex);
>         ctx->wrapper_ret = ret;
> -       ctx->wrapper_done = 1;
> +       ctx->wrapper_done = true;
>         pthread_cond_signal(&ctx->wrapper_cond);
>         pthread_mutex_unlock(&ctx->wrapper_mutex);
> 
> @@ -127,6 +127,7 @@ rte_thread_create(rte_thread_t *thread_id,
>                 .thread_func = thread_func,
>                 .thread_args = args,
>                 .thread_attr = thread_attr,
> +               .wrapper_done = false,
>                 .wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
>                 .wrapper_cond = PTHREAD_COND_INITIALIZER,
>         };
> @@ -151,7 +152,6 @@ rte_thread_create(rte_thread_t *thread_id,
>                         goto cleanup;
>                 }
> 
> -
>                 if (thread_attr->priority ==
>                                 RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
>                         ret = ENOTSUP;
> @@ -183,7 +183,7 @@ rte_thread_create(rte_thread_t *thread_id,
>         }
> 
>         pthread_mutex_lock(&ctx.wrapper_mutex);
> -       while (ctx.wrapper_done != 1)
> +       while (!ctx.wrapper_done)
>                 pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
>         ret = ctx.wrapper_ret;
>         pthread_mutex_unlock(&ctx.wrapper_mutex);
> 
> 
> The rest lgtmn thanks Tyler.
> 
> 
> 
> -- 
> David Marchand

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 14:49       ` Tyler Retzlaff
@ 2023-03-17 18:51         ` David Marchand
  2023-03-17 21:20           ` Tyler Retzlaff
  0 siblings, 1 reply; 22+ messages in thread
From: David Marchand @ 2023-03-17 18:51 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, thomas, stephen, stable, Dodji Seketeli

On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
> > > -struct thread_routine_ctx {
> > > +struct thread_start_context {
> > >         rte_thread_func thread_func;
> > > -       void *routine_args;
> > > +       void *thread_args;
> > > +       const rte_thread_attr_t *thread_attr;
> > > +       pthread_mutex_t wrapper_mutex;
> > > +       pthread_cond_t wrapper_cond;
> > > +       int wrapper_ret;
> > > +       volatile int wrapper_done;
> >
> > One question.
> >
> > I see that wrapper_done is accessed under wrapper_mutex.
> > Is volatile needed?
>
> I'm not entirely certain. i'm being cautious since i can conceive of the
> load in the loop being optimized as a single load by the compiler. but
> again i'm not sure, i always like to learn if someone knows better.

After an interesting discussion with Dodji on C99 and side effects
(5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
need this volatile.


>
> >
> > (nit: a boolean is probably enough too)
>
> I have no issue with it being a _Bool if you want to adjust it for that
> i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> code seems to prefer int so that's why i chose it. if we use the macro
> bool then we should include stdbool.h directly into this translation
> unit.
>
> >
> > I was thinking of squashing below diff:
>
> Yeah, no objection. you can decide if you want to keep the volatile or
> not and add the stdbool.h include.
>
> Thanks for reviewing, appreciate it.

This is a fix but this v5 had an additional change in affinity setting
(switching to rte_thread_set_affinity()).
To be on the safe side wrt backport, I'll also revert to calling
rte_thread_set_affinity_by_id as this is what was being used before.
And this removes the need for patch 1.

Sending a v6 soon, so that it goes through the CI before rc3.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v6] eal/unix: fix thread creation
  2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
                   ` (6 preceding siblings ...)
       [not found] ` <1678925224-2706-1-git-send-email-roretzla@linux.microsoft.com>
@ 2023-03-17 18:52 ` David Marchand
  2023-03-17 21:24   ` Tyler Retzlaff
  2023-03-18 18:26   ` David Marchand
  7 siblings, 2 replies; 22+ messages in thread
From: David Marchand @ 2023-03-17 18:52 UTC (permalink / raw)
  To: dev
  Cc: thomas, stephen, Tyler Retzlaff, stable, Narcisa Vasile, Dmitry Kozlyuk

From: Tyler Retzlaff <roretzla@linux.microsoft.com>

In rte_thread_create setting affinity after pthread_create may fail.
Such a failure should result in the entire rte_thread_create failing
but doesn't.

Additionally if there is a failure to set affinity a race exists where
the creating thread will free ctx and depending on scheduling of the new
thread it may also free ctx (double free).

Resolve the above by setting the affinity from the newly created thread
using a condition variable to signal the completion of the thread
start wrapper having completed.

Since we are now waiting for the thread start wrapper to complete we can
allocate the thread start wrapper context on the stack. While here clean
up the variable naming in the context to better highlight the fields of
the context require synchronization between the creating and created
thread.

Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
Cc: stable@dpdk.org

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
Changes since v5:
- dropped volatile and switched to boolean for wrapper_done,
- reverted to rte_thread_set_affinity_by_id() call,

---
 lib/eal/unix/rte_thread.c | 73 ++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 37ebfcfca1..f4076122a4 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -5,6 +5,7 @@
 
 #include <errno.h>
 #include <pthread.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -16,9 +17,14 @@ struct eal_tls_key {
 	pthread_key_t thread_index;
 };
 
-struct thread_routine_ctx {
+struct thread_start_context {
 	rte_thread_func thread_func;
-	void *routine_args;
+	void *thread_args;
+	const rte_thread_attr_t *thread_attr;
+	pthread_mutex_t wrapper_mutex;
+	pthread_cond_t wrapper_cond;
+	int wrapper_ret;
+	bool wrapper_done;
 };
 
 static int
@@ -81,13 +87,29 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
 }
 
 static void *
-thread_func_wrapper(void *arg)
+thread_start_wrapper(void *arg)
 {
-	struct thread_routine_ctx ctx = *(struct thread_routine_ctx *)arg;
+	struct thread_start_context *ctx = (struct thread_start_context *)arg;
+	rte_thread_func thread_func = ctx->thread_func;
+	void *thread_args = ctx->thread_args;
+	int ret = 0;
+
+	if (ctx->thread_attr != NULL && CPU_COUNT(&ctx->thread_attr->cpuset) > 0) {
+		ret = rte_thread_set_affinity_by_id(rte_thread_self(), &ctx->thread_attr->cpuset);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
+	}
 
-	free(arg);
+	pthread_mutex_lock(&ctx->wrapper_mutex);
+	ctx->wrapper_ret = ret;
+	ctx->wrapper_done = true;
+	pthread_cond_signal(&ctx->wrapper_cond);
+	pthread_mutex_unlock(&ctx->wrapper_mutex);
 
-	return (void *)(uintptr_t)ctx.thread_func(ctx.routine_args);
+	if (ret != 0)
+		return NULL;
+
+	return (void *)(uintptr_t)thread_func(thread_args);
 }
 
 int
@@ -98,20 +120,18 @@ rte_thread_create(rte_thread_t *thread_id,
 	int ret = 0;
 	pthread_attr_t attr;
 	pthread_attr_t *attrp = NULL;
-	struct thread_routine_ctx *ctx;
 	struct sched_param param = {
 		.sched_priority = 0,
 	};
 	int policy = SCHED_OTHER;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		RTE_LOG(DEBUG, EAL, "Insufficient memory for thread context allocations\n");
-		ret = ENOMEM;
-		goto cleanup;
-	}
-	ctx->routine_args = args;
-	ctx->thread_func = thread_func;
+	struct thread_start_context ctx = {
+		.thread_func = thread_func,
+		.thread_args = args,
+		.thread_attr = thread_attr,
+		.wrapper_done = false,
+		.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.wrapper_cond = PTHREAD_COND_INITIALIZER,
+	};
 
 	if (thread_attr != NULL) {
 		ret = pthread_attr_init(&attr);
@@ -133,7 +153,6 @@ rte_thread_create(rte_thread_t *thread_id,
 			goto cleanup;
 		}
 
-
 		if (thread_attr->priority ==
 				RTE_THREAD_PRIORITY_REALTIME_CRITICAL) {
 			ret = ENOTSUP;
@@ -158,24 +177,22 @@ rte_thread_create(rte_thread_t *thread_id,
 	}
 
 	ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
-		thread_func_wrapper, ctx);
+		thread_start_wrapper, &ctx);
 	if (ret != 0) {
 		RTE_LOG(DEBUG, EAL, "pthread_create failed\n");
 		goto cleanup;
 	}
 
-	if (thread_attr != NULL && CPU_COUNT(&thread_attr->cpuset) > 0) {
-		ret = rte_thread_set_affinity_by_id(*thread_id,
-			&thread_attr->cpuset);
-		if (ret != 0) {
-			RTE_LOG(DEBUG, EAL, "rte_thread_set_affinity_by_id failed\n");
-			goto cleanup;
-		}
-	}
+	pthread_mutex_lock(&ctx.wrapper_mutex);
+	while (!ctx.wrapper_done)
+		pthread_cond_wait(&ctx.wrapper_cond, &ctx.wrapper_mutex);
+	ret = ctx.wrapper_ret;
+	pthread_mutex_unlock(&ctx.wrapper_mutex);
+
+	if (ret != 0)
+		pthread_join((pthread_t)thread_id->opaque_id, NULL);
 
-	ctx = NULL;
 cleanup:
-	free(ctx);
 	if (attrp != NULL)
 		pthread_attr_destroy(&attr);
 
-- 
2.39.2


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v5 2/2] eal: fix failure path race setting new thread affinity
  2023-03-17 18:51         ` David Marchand
@ 2023-03-17 21:20           ` Tyler Retzlaff
  0 siblings, 0 replies; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 21:20 UTC (permalink / raw)
  To: David Marchand; +Cc: dev, thomas, stephen, stable, Dodji Seketeli

On Fri, Mar 17, 2023 at 07:51:25PM +0100, David Marchand wrote:
> On Fri, Mar 17, 2023 at 3:50 PM Tyler Retzlaff
> <roretzla@linux.microsoft.com> wrote:
> > > > -struct thread_routine_ctx {
> > > > +struct thread_start_context {
> > > >         rte_thread_func thread_func;
> > > > -       void *routine_args;
> > > > +       void *thread_args;
> > > > +       const rte_thread_attr_t *thread_attr;
> > > > +       pthread_mutex_t wrapper_mutex;
> > > > +       pthread_cond_t wrapper_cond;
> > > > +       int wrapper_ret;
> > > > +       volatile int wrapper_done;
> > >
> > > One question.
> > >
> > > I see that wrapper_done is accessed under wrapper_mutex.
> > > Is volatile needed?
> >
> > I'm not entirely certain. i'm being cautious since i can conceive of the
> > load in the loop being optimized as a single load by the compiler. but
> > again i'm not sure, i always like to learn if someone knows better.
> 
> After an interesting discussion with Dodji on C99 and side effects
> (5.1.2.3/2 and 5.1.2.3/3), I am a bit more convinced that we don't
> need this volatile.

Thanks for the references, based on the reading i agree we can drop the
volatile.

> 
> 
> >
> > >
> > > (nit: a boolean is probably enough too)
> >
> > I have no issue with it being a _Bool if you want to adjust it for that
> > i certainly don't object. ordinarily i would use _Bool but a lot of dpdk
> > code seems to prefer int so that's why i chose it. if we use the macro
> > bool then we should include stdbool.h directly into this translation
> > unit.
> >
> > >
> > > I was thinking of squashing below diff:
> >
> > Yeah, no objection. you can decide if you want to keep the volatile or
> > not and add the stdbool.h include.
> >
> > Thanks for reviewing, appreciate it.
> 
> This is a fix but this v5 had an additional change in affinity setting
> (switching to rte_thread_set_affinity()).
> To be on the safe side wrt backport, I'll also revert to calling
> rte_thread_set_affinity_by_id as this is what was being used before.
> And this removes the need for patch 1.

Is it worth merging the const patch but not backporting? I'm not fussed
either way.

> 
> Sending a v6 soon, so that it goes through the CI before rc3.

Yes, great.

Thanks David!

> 
> 
> -- 
> David Marchand

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
@ 2023-03-17 21:24   ` Tyler Retzlaff
  2023-03-18 18:26     ` David Marchand
  2023-03-18 18:26   ` David Marchand
  1 sibling, 1 reply; 22+ messages in thread
From: Tyler Retzlaff @ 2023-03-17 21:24 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 07:52:29PM +0100, David Marchand wrote:
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> 
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
> 
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
> 
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
> 
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
> 
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Looks good to me, not sure if you need a Reviewed-by: from me for the
changes but here is one anyway.

v5
Reviewed-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 21:24   ` Tyler Retzlaff
@ 2023-03-18 18:26     ` David Marchand
  0 siblings, 0 replies; 22+ messages in thread
From: David Marchand @ 2023-03-18 18:26 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 10:24 PM Tyler Retzlaff
<roretzla@linux.microsoft.com> wrote:
>
> On Fri, Mar 17, 2023 at 07:52:29PM +0100, David Marchand wrote:
> > From: Tyler Retzlaff <roretzla@linux.microsoft.com>
> >
> > In rte_thread_create setting affinity after pthread_create may fail.
> > Such a failure should result in the entire rte_thread_create failing
> > but doesn't.
> >
> > Additionally if there is a failure to set affinity a race exists where
> > the creating thread will free ctx and depending on scheduling of the new
> > thread it may also free ctx (double free).
> >
> > Resolve the above by setting the affinity from the newly created thread
> > using a condition variable to signal the completion of the thread
> > start wrapper having completed.
> >
> > Since we are now waiting for the thread start wrapper to complete we can
> > allocate the thread start wrapper context on the stack. While here clean
> > up the variable naming in the context to better highlight the fields of
> > the context require synchronization between the creating and created
> > thread.
> >
> > Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> Looks good to me, not sure if you need a Reviewed-by: from me for the
> changes but here is one anyway.
>
> v5
> Reviewed-by: Tyler Retzlaff <roretzla@linux.microsoft.com>

Thanks for the review, I'll keep your SoB only on the patch.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6] eal/unix: fix thread creation
  2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
  2023-03-17 21:24   ` Tyler Retzlaff
@ 2023-03-18 18:26   ` David Marchand
  1 sibling, 0 replies; 22+ messages in thread
From: David Marchand @ 2023-03-18 18:26 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, thomas, stephen, stable, Narcisa Vasile, Dmitry Kozlyuk

On Fri, Mar 17, 2023 at 7:52 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> From: Tyler Retzlaff <roretzla@linux.microsoft.com>
>
> In rte_thread_create setting affinity after pthread_create may fail.
> Such a failure should result in the entire rte_thread_create failing
> but doesn't.
>
> Additionally if there is a failure to set affinity a race exists where
> the creating thread will free ctx and depending on scheduling of the new
> thread it may also free ctx (double free).
>
> Resolve the above by setting the affinity from the newly created thread
> using a condition variable to signal the completion of the thread
> start wrapper having completed.
>
> Since we are now waiting for the thread start wrapper to complete we can
> allocate the thread start wrapper context on the stack. While here clean
> up the variable naming in the context to better highlight the fields of
> the context require synchronization between the creating and created
> thread.
>
> Fixes: ce6e911d20f6 ("eal: add thread lifetime API")
> Cc: stable@dpdk.org
>
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Applied thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2023-03-18 18:26 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-02 18:44 [PATCH 1/2] eal: fix failure race and behavior of thread create Tyler Retzlaff
2023-03-02 18:44 ` [PATCH 2/2] eal/windows: fix create thread failure behavior Tyler Retzlaff
2023-03-07 14:33 ` [PATCH 1/2] eal: fix failure race and behavior of thread create David Marchand
2023-03-09  9:17   ` David Marchand
2023-03-09  9:58     ` Thomas Monjalon
2023-03-09 20:49       ` Tyler Retzlaff
2023-03-09 21:05         ` David Marchand
     [not found] ` <1678750267-3829-1-git-send-email-roretzla@linux.microsoft.com>
2023-03-13 23:31   ` [PATCH v2 2/2] eal: fix failure path race setting new thread affinity Tyler Retzlaff
     [not found] ` <1678833856-9314-1-git-send-email-roretzla@linux.microsoft.com>
2023-03-14 22:44   ` [PATCH v3 " Tyler Retzlaff
     [not found] ` <1678834239-11569-1-git-send-email-roretzla@linux.microsoft.com>
2023-03-14 22:50   ` [PATCH v4 " Tyler Retzlaff
2023-03-15  1:20     ` Stephen Hemminger
2023-03-15  1:26       ` Tyler Retzlaff
     [not found] ` <1678925050-1955-1-git-send-email-roretzla@linux.microsoft.com>
2023-03-16  0:04   ` Tyler Retzlaff
     [not found] ` <1678925224-2706-1-git-send-email-roretzla@linux.microsoft.com>
2023-03-16  0:07   ` [PATCH v5 " Tyler Retzlaff
2023-03-17 10:45     ` David Marchand
2023-03-17 14:49       ` Tyler Retzlaff
2023-03-17 18:51         ` David Marchand
2023-03-17 21:20           ` Tyler Retzlaff
2023-03-17 18:52 ` [PATCH v6] eal/unix: fix thread creation David Marchand
2023-03-17 21:24   ` Tyler Retzlaff
2023-03-18 18:26     ` David Marchand
2023-03-18 18:26   ` David Marchand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).