DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH] eal: fix data race in multi-process support
@ 2021-12-17 18:16 Stephen Hemminger
  2021-12-17 18:29 ` Stephen Hemminger
  0 siblings, 1 reply; 8+ messages in thread
From: Stephen Hemminger @ 2021-12-17 18:16 UTC (permalink / raw)
  To: anatoly.burakov; +Cc: stable, dev, Stephen Hemminger

If DPDK is built with thread sanitizer it reports a race
in setting of multiprocess file descriptor. The fix is to
use atomic operations when updating mp_fd.

Simple example:
$ dpdk-testpmd -l 1-3 --no-huge
EAL: Detected CPU lcores: 16
EAL: Detected NUMA nodes: 1
EAL: Static memory layout is selected, amount of reserved memory can be adjusted with -m or --socket-mem
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /run/user/1000/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
testpmd: No probed ethernet devices
testpmd: create a new mbuf pool <mb_pool_0>: n=163456, size=2176, socket=0
testpmd: preferred mempool ops selected: ring_mp_mc
EAL: Error - exiting with code: 1
  Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
==================
WARNING: ThreadSanitizer: data race (pid=83054)
  Write of size 4 at 0x55e3b7fce450 by main thread:
    #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x160d79c)
    #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1614fb5)
    #2 rte_exit <null> (dpdk-testpmd+0x15ec97a)
    #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x242e1a)
    #4 main <null> (dpdk-testpmd+0x5ab05d)

  Previous read of size 4 at 0x55e3b7fce450 by thread T2:
    #0 mp_handle <null> (dpdk-testpmd+0x160c979)
    #1 ctrl_thread_init <null> (dpdk-testpmd+0x15ff76e)

  As if synchronized via sleep:
    #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:362 (libtsan.so.0+0x5cd8e)
    #1 get_tsc_freq <null> (dpdk-testpmd+0x1622889)
    #2 set_tsc_freq <null> (dpdk-testpmd+0x15ffb9c)
    #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1622a34)
    #4 rte_eal_init.cold <null> (dpdk-testpmd+0x26b314)
    #5 main <null> (dpdk-testpmd+0x5aab45)

  Location is global 'mp_fd' of size 4 at 0x55e3b7fce450 (dpdk-testpmd+0x0000027c7450)

  Thread T2 'rte_mp_handle' (tid=83057, running) created by main thread at:
    #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962 (libtsan.so.0+0x58ba2)
    #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x15ff870)
    #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x269986)
    #3 rte_eal_init <null> (dpdk-testpmd+0x1615b28)
    #4 main <null> (dpdk-testpmd+0x5aab45)

SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x160d79c) in rte_mp_channel_cleanup
==================
ThreadSanitizer: reported 1 warnings

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/eal/common/eal_common_proc.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index ebd0f6673b8b..4e6c1af59833 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -262,7 +262,7 @@ rte_mp_action_unregister(const char *name)
 }
 
 static int
-read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+read_msg(int fd, struct mp_msg_internal *m, struct sockaddr_un *s)
 {
 	int msglen;
 	struct iovec iov;
@@ -282,7 +282,7 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 	msgh.msg_control = control;
 	msgh.msg_controllen = sizeof(control);
 
-	msglen = recvmsg(mp_fd, &msgh, 0);
+	msglen = recvmsg(fd, &msgh, 0);
 	if (msglen < 0) {
 		RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
 		return -1;
@@ -383,9 +383,10 @@ mp_handle(void *arg __rte_unused)
 {
 	struct mp_msg_internal msg;
 	struct sockaddr_un sa;
+	int fd;
 
-	while (mp_fd >= 0) {
-		if (read_msg(&msg, &sa) == 0)
+	while ((fd = __atomic_load_n(&mp_fd, __ATOMIC_RELAXED)) >= 0) {
+		if (read_msg(fd, &msg, &sa) == 0)
 			process_msg(&msg, &sa);
 	}
 
@@ -537,14 +538,15 @@ static int
 open_socket_fd(void)
 {
 	struct sockaddr_un un;
+	int fd;
 
 	peer_name[0] = '\0';
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
 		snprintf(peer_name, sizeof(peer_name),
 				"%d_%"PRIx64, getpid(), rte_rdtsc());
 
-	mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (mp_fd < 0) {
+	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (fd < 0) {
 		RTE_LOG(ERR, EAL, "failed to create unix socket\n");
 		return -1;
 	}
@@ -559,12 +561,13 @@ open_socket_fd(void)
 	if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
 		RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
 			un.sun_path, strerror(errno));
-		close(mp_fd);
+		close(fd);
 		return -1;
 	}
 
 	RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path);
-	return mp_fd;
+	__atomic_store_n(&mp_fd, fd, __ATOMIC_RELAXED);
+	return fd;
 }
 
 static void
@@ -626,9 +629,8 @@ rte_mp_channel_init(void)
 			NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thread: %s\n",
 			strerror(errno));
-		close(mp_fd);
 		close(dir_fd);
-		mp_fd = -1;
+		close(__atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED));
 		return -1;
 	}
 
@@ -644,11 +646,10 @@ rte_mp_channel_cleanup(void)
 {
 	int fd;
 
-	if (mp_fd < 0)
+	fd = __atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED);
+	if (fd < 0)
 		return;
 
-	fd = mp_fd;
-	mp_fd = -1;
 	pthread_cancel(mp_handle_tid);
 	pthread_join(mp_handle_tid, NULL);
 	close_socket_fd(fd);
-- 
2.30.2


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] eal: fix data race in multi-process support
  2021-12-17 18:16 [PATCH] eal: fix data race in multi-process support Stephen Hemminger
@ 2021-12-17 18:29 ` Stephen Hemminger
  2022-02-13 11:39   ` Thomas Monjalon
                     ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Stephen Hemminger @ 2021-12-17 18:29 UTC (permalink / raw)
  To: anatoly.burakov; +Cc: dev, stable, Stephen Hemminger

If DPDK is built with thread sanitizer it reports a race
in setting of multiprocess file descriptor. The fix is to
use atomic operations when updating mp_fd.

Simple example:
$ dpdk-testpmd -l 1-3 --no-huge
...
EAL: Error - exiting with code: 1
  Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
==================
WARNING: ThreadSanitizer: data race (pid=83054)
  Write of size 4 at 0x55e3b7fce450 by main thread:
    #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x160d79c)
    #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1614fb5)
    #2 rte_exit <null> (dpdk-testpmd+0x15ec97a)
    #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x242e1a)
    #4 main <null> (dpdk-testpmd+0x5ab05d)

  Previous read of size 4 at 0x55e3b7fce450 by thread T2:
    #0 mp_handle <null> (dpdk-testpmd+0x160c979)
    #1 ctrl_thread_init <null> (dpdk-testpmd+0x15ff76e)

  As if synchronized via sleep:
    #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:362 (libtsan.so.0+0x5cd8e)
    #1 get_tsc_freq <null> (dpdk-testpmd+0x1622889)
    #2 set_tsc_freq <null> (dpdk-testpmd+0x15ffb9c)
    #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1622a34)
    #4 rte_eal_init.cold <null> (dpdk-testpmd+0x26b314)
    #5 main <null> (dpdk-testpmd+0x5aab45)

  Location is global 'mp_fd' of size 4 at 0x55e3b7fce450 (dpdk-testpmd+0x0000027c7450)

  Thread T2 'rte_mp_handle' (tid=83057, running) created by main thread at:
    #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962 (libtsan.so.0+0x58ba2)
    #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x15ff870)
    #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x269986)
    #3 rte_eal_init <null> (dpdk-testpmd+0x1615b28)
    #4 main <null> (dpdk-testpmd+0x5aab45)

SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x160d79c) in rte_mp_channel_cleanup
==================
ThreadSanitizer: reported 1 warnings

Fixes: bacaa2754017 ("eal: add channel for multi-process communication")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

---
v2 - fix the mp socket bind

 lib/eal/common/eal_common_proc.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index ebd0f6673b8b..72c7e8f536af 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -262,7 +262,7 @@ rte_mp_action_unregister(const char *name)
 }
 
 static int
-read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+read_msg(int fd, struct mp_msg_internal *m, struct sockaddr_un *s)
 {
 	int msglen;
 	struct iovec iov;
@@ -282,7 +282,7 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 	msgh.msg_control = control;
 	msgh.msg_controllen = sizeof(control);
 
-	msglen = recvmsg(mp_fd, &msgh, 0);
+	msglen = recvmsg(fd, &msgh, 0);
 	if (msglen < 0) {
 		RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
 		return -1;
@@ -383,9 +383,10 @@ mp_handle(void *arg __rte_unused)
 {
 	struct mp_msg_internal msg;
 	struct sockaddr_un sa;
+	int fd;
 
-	while (mp_fd >= 0) {
-		if (read_msg(&msg, &sa) == 0)
+	while ((fd = __atomic_load_n(&mp_fd, __ATOMIC_RELAXED)) >= 0) {
+		if (read_msg(fd, &msg, &sa) == 0)
 			process_msg(&msg, &sa);
 	}
 
@@ -626,9 +627,8 @@ rte_mp_channel_init(void)
 			NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thread: %s\n",
 			strerror(errno));
-		close(mp_fd);
 		close(dir_fd);
-		mp_fd = -1;
+		close(__atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED));
 		return -1;
 	}
 
@@ -644,11 +644,10 @@ rte_mp_channel_cleanup(void)
 {
 	int fd;
 
-	if (mp_fd < 0)
+	fd = __atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED);
+	if (fd < 0)
 		return;
 
-	fd = mp_fd;
-	mp_fd = -1;
 	pthread_cancel(mp_handle_tid);
 	pthread_join(mp_handle_tid, NULL);
 	close_socket_fd(fd);
-- 
2.30.2


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] eal: fix data race in multi-process support
  2021-12-17 18:29 ` Stephen Hemminger
@ 2022-02-13 11:39   ` Thomas Monjalon
  2022-04-14 20:28     ` Stephen Hemminger
  2022-04-20 15:13   ` Burakov, Anatoly
  2022-09-06 16:45   ` [PATCH v2] " Stephen Hemminger
  2 siblings, 1 reply; 8+ messages in thread
From: Thomas Monjalon @ 2022-02-13 11:39 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: anatoly.burakov, stable, dev, david.marchand

17/12/2021 19:29, Stephen Hemminger:
> If DPDK is built with thread sanitizer it reports a race
> in setting of multiprocess file descriptor. The fix is to
> use atomic operations when updating mp_fd.

Please could explain more the condition of the race?
Is it between init and cleanup of the same file descriptor?
How atomic is helping here?


> 
> Simple example:
> $ dpdk-testpmd -l 1-3 --no-huge
> ...
> EAL: Error - exiting with code: 1
>   Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
> ==================
> WARNING: ThreadSanitizer: data race (pid=83054)
>   Write of size 4 at 0x55e3b7fce450 by main thread:
>     #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x160d79c)
>     #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1614fb5)
>     #2 rte_exit <null> (dpdk-testpmd+0x15ec97a)
>     #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x242e1a)
>     #4 main <null> (dpdk-testpmd+0x5ab05d)
> 
>   Previous read of size 4 at 0x55e3b7fce450 by thread T2:
>     #0 mp_handle <null> (dpdk-testpmd+0x160c979)
>     #1 ctrl_thread_init <null> (dpdk-testpmd+0x15ff76e)
> 
>   As if synchronized via sleep:
>     #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:362 (libtsan.so.0+0x5cd8e)
>     #1 get_tsc_freq <null> (dpdk-testpmd+0x1622889)
>     #2 set_tsc_freq <null> (dpdk-testpmd+0x15ffb9c)
>     #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1622a34)
>     #4 rte_eal_init.cold <null> (dpdk-testpmd+0x26b314)
>     #5 main <null> (dpdk-testpmd+0x5aab45)
> 
>   Location is global 'mp_fd' of size 4 at 0x55e3b7fce450 (dpdk-testpmd+0x0000027c7450)
> 
>   Thread T2 'rte_mp_handle' (tid=83057, running) created by main thread at:
>     #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962 (libtsan.so.0+0x58ba2)
>     #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x15ff870)
>     #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x269986)
>     #3 rte_eal_init <null> (dpdk-testpmd+0x1615b28)
>     #4 main <null> (dpdk-testpmd+0x5aab45)




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] eal: fix data race in multi-process support
  2022-02-13 11:39   ` Thomas Monjalon
@ 2022-04-14 20:28     ` Stephen Hemminger
  0 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2022-04-14 20:28 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: anatoly.burakov, stable, dev, david.marchand

On Sun, 13 Feb 2022 12:39:59 +0100
Thomas Monjalon <thomas@monjalon.net> wrote:

> 17/12/2021 19:29, Stephen Hemminger:
> > If DPDK is built with thread sanitizer it reports a race
> > in setting of multiprocess file descriptor. The fix is to
> > use atomic operations when updating mp_fd.  
> 
> Please could explain more the condition of the race?
> Is it between init and cleanup of the same file descriptor?
> How atomic is helping here?
> 
> 
> > 
> > Simple example:
> > $ dpdk-testpmd -l 1-3 --no-huge
> > ...
> > EAL: Error - exiting with code: 1
> >   Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
> > ==================
> > WARNING: ThreadSanitizer: data race (pid=83054)
> >   Write of size 4 at 0x55e3b7fce450 by main thread:
> >     #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x160d79c)
> >     #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1614fb5)
> >     #2 rte_exit <null> (dpdk-testpmd+0x15ec97a)
> >     #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x242e1a)
> >     #4 main <null> (dpdk-testpmd+0x5ab05d)
> > 
> >   Previous read of size 4 at 0x55e3b7fce450 by thread T2:
> >     #0 mp_handle <null> (dpdk-testpmd+0x160c979)
> >     #1 ctrl_thread_init <null> (dpdk-testpmd+0x15ff76e)
> > 
> >   As if synchronized via sleep:
> >     #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:362 (libtsan.so.0+0x5cd8e)
> >     #1 get_tsc_freq <null> (dpdk-testpmd+0x1622889)
> >     #2 set_tsc_freq <null> (dpdk-testpmd+0x15ffb9c)
> >     #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1622a34)
> >     #4 rte_eal_init.cold <null> (dpdk-testpmd+0x26b314)
> >     #5 main <null> (dpdk-testpmd+0x5aab45)
> > 
> >   Location is global 'mp_fd' of size 4 at 0x55e3b7fce450 (dpdk-testpmd+0x0000027c7450)
> > 
> >   Thread T2 'rte_mp_handle' (tid=83057, running) created by main thread at:
> >     #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962 (libtsan.so.0+0x58ba2)
> >     #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x15ff870)
> >     #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x269986)
> >     #3 rte_eal_init <null> (dpdk-testpmd+0x1615b28)
> >     #4 main <null> (dpdk-testpmd+0x5aab45)  
> 
> 
> 

The issue is that two threads are sharing a global variable without barriers or atomic.
The variable mp_fd is set in control thread rte_mp_channel_init/rte_mp_channel_cleanup
but then read by the thread that handles multiprocess (mp_handle).

This sharing of global data without barrier or lock is unsafe/undefined, and can
break on weakly ordered CPU's like ARM.

Kind of surprised that we don't see bug already since compiler could decide that
mp_fd in the function mp_handle() is invariant and not test it and have the thread
run forever.

This is a bug from the beginning of MP support in DPDK.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] eal: fix data race in multi-process support
  2021-12-17 18:29 ` Stephen Hemminger
  2022-02-13 11:39   ` Thomas Monjalon
@ 2022-04-20 15:13   ` Burakov, Anatoly
  2022-09-06 16:45   ` [PATCH v2] " Stephen Hemminger
  2 siblings, 0 replies; 8+ messages in thread
From: Burakov, Anatoly @ 2022-04-20 15:13 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, stable

On 17-Dec-21 6:29 PM, Stephen Hemminger wrote:
> If DPDK is built with thread sanitizer it reports a race
> in setting of multiprocess file descriptor. The fix is to
> use atomic operations when updating mp_fd.
> 
> Simple example:
> $ dpdk-testpmd -l 1-3 --no-huge
> ...
> EAL: Error - exiting with code: 1
>    Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
> ==================
> WARNING: ThreadSanitizer: data race (pid=83054)
>    Write of size 4 at 0x55e3b7fce450 by main thread:
>      #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x160d79c)
>      #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1614fb5)
>      #2 rte_exit <null> (dpdk-testpmd+0x15ec97a)
>      #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x242e1a)
>      #4 main <null> (dpdk-testpmd+0x5ab05d)
> 
>    Previous read of size 4 at 0x55e3b7fce450 by thread T2:
>      #0 mp_handle <null> (dpdk-testpmd+0x160c979)
>      #1 ctrl_thread_init <null> (dpdk-testpmd+0x15ff76e)
> 
>    As if synchronized via sleep:
>      #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:362 (libtsan.so.0+0x5cd8e)
>      #1 get_tsc_freq <null> (dpdk-testpmd+0x1622889)
>      #2 set_tsc_freq <null> (dpdk-testpmd+0x15ffb9c)
>      #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1622a34)
>      #4 rte_eal_init.cold <null> (dpdk-testpmd+0x26b314)
>      #5 main <null> (dpdk-testpmd+0x5aab45)
> 
>    Location is global 'mp_fd' of size 4 at 0x55e3b7fce450 (dpdk-testpmd+0x0000027c7450)
> 
>    Thread T2 'rte_mp_handle' (tid=83057, running) created by main thread at:
>      #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:962 (libtsan.so.0+0x58ba2)
>      #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x15ff870)
>      #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x269986)
>      #3 rte_eal_init <null> (dpdk-testpmd+0x1615b28)
>      #4 main <null> (dpdk-testpmd+0x5aab45)
> 
> SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x160d79c) in rte_mp_channel_cleanup
> ==================
> ThreadSanitizer: reported 1 warnings
> 
> Fixes: bacaa2754017 ("eal: add channel for multi-process communication")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> ---
> v2 - fix the mp socket bind
> 

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] eal: fix data race in multi-process support
  2021-12-17 18:29 ` Stephen Hemminger
  2022-02-13 11:39   ` Thomas Monjalon
  2022-04-20 15:13   ` Burakov, Anatoly
@ 2022-09-06 16:45   ` Stephen Hemminger
  2022-09-07  0:31     ` fengchengwen
  2022-10-09 23:53     ` Thomas Monjalon
  2 siblings, 2 replies; 8+ messages in thread
From: Stephen Hemminger @ 2022-09-06 16:45 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Anatoly Burakov

If DPDK is built with thread sanitizer it reports a race
in setting of multiprocess file descriptor. The fix is to
use atomic operations when updating mp_fd.

Build:
$ meson -Db_sanitize=address build
$ ninja -C build

Simple example:
$ .build/app/dpdk-testpmd -l 1-3 --no-huge
EAL: Detected CPU lcores: 16
EAL: Detected NUMA nodes: 1
EAL: Static memory layout is selected, amount of reserved memory can be adjusted with -m or --socket-mem
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /run/user/1000/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
testpmd: No probed ethernet devices
testpmd: create a new mbuf pool <mb_pool_0>: n=163456, size=2176, socket=0
testpmd: preferred mempool ops selected: ring_mp_mc
EAL: Error - exiting with code: 1
  Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
==================
WARNING: ThreadSanitizer: data race (pid=87245)
  Write of size 4 at 0x558e04d8ff70 by main thread:
    #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x1e7d30c)
    #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1e85929)
    #2 rte_exit <null> (dpdk-testpmd+0x1e5bc0a)
    #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x274011)
    #4 main <null> (dpdk-testpmd+0x5cc15d)

  Previous read of size 4 at 0x558e04d8ff70 by thread T2:
    #0 mp_handle <null> (dpdk-testpmd+0x1e7c439)
    #1 ctrl_thread_init <null> (dpdk-testpmd+0x1e6ee1e)

  As if synchronized via sleep:
    #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:366 (libtsan.so.0+0x6075e)
    #1 get_tsc_freq <null> (dpdk-testpmd+0x1e92ff9)
    #2 set_tsc_freq <null> (dpdk-testpmd+0x1e6f2fc)
    #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1e931a4)
    #4 rte_eal_init.cold <null> (dpdk-testpmd+0x29e578)
    #5 main <null> (dpdk-testpmd+0x5cbc45)

  Location is global 'mp_fd' of size 4 at 0x558e04d8ff70 (dpdk-testpmd+0x000003122f70)

  Thread T2 'rte_mp_handle' (tid=87248, running) created by main thread at:
    #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:969 (libtsan.so.0+0x5ad75)
    #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x1e6efd0)
    #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x29cb7c)
    #3 rte_eal_init <null> (dpdk-testpmd+0x1e8662e)
    #4 main <null> (dpdk-testpmd+0x5cbc45)

SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x1e7d30c) in rte_mp_channel_cleanup
==================
ThreadSanitizer: reported 1 warnings

Fixes: bacaa2754017 ("eal: add channel for multi-process communication")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 lib/eal/common/eal_common_proc.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 313060528fec..1fc1d6c53bd2 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -260,7 +260,7 @@ rte_mp_action_unregister(const char *name)
 }
 
 static int
-read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+read_msg(int fd, struct mp_msg_internal *m, struct sockaddr_un *s)
 {
 	int msglen;
 	struct iovec iov;
@@ -281,7 +281,7 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 	msgh.msg_controllen = sizeof(control);
 
 retry:
-	msglen = recvmsg(mp_fd, &msgh, 0);
+	msglen = recvmsg(fd, &msgh, 0);
 
 	/* zero length message means socket was closed */
 	if (msglen == 0)
@@ -390,11 +390,12 @@ mp_handle(void *arg __rte_unused)
 {
 	struct mp_msg_internal msg;
 	struct sockaddr_un sa;
+	int fd;
 
-	while (mp_fd >= 0) {
+	while ((fd = __atomic_load_n(&mp_fd, __ATOMIC_RELAXED)) >= 0) {
 		int ret;
 
-		ret = read_msg(&msg, &sa);
+		ret = read_msg(fd, &msg, &sa);
 		if (ret <= 0)
 			break;
 
@@ -638,9 +639,8 @@ rte_mp_channel_init(void)
 			NULL, mp_handle, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "failed to create mp thread: %s\n",
 			strerror(errno));
-		close(mp_fd);
 		close(dir_fd);
-		mp_fd = -1;
+		close(__atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED));
 		return -1;
 	}
 
@@ -656,11 +656,10 @@ rte_mp_channel_cleanup(void)
 {
 	int fd;
 
-	if (mp_fd < 0)
+	fd = __atomic_exchange_n(&mp_fd, -1, __ATOMIC_RELAXED);
+	if (fd < 0)
 		return;
 
-	fd = mp_fd;
-	mp_fd = -1;
 	pthread_cancel(mp_handle_tid);
 	pthread_join(mp_handle_tid, NULL);
 	close_socket_fd(fd);
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] eal: fix data race in multi-process support
  2022-09-06 16:45   ` [PATCH v2] " Stephen Hemminger
@ 2022-09-07  0:31     ` fengchengwen
  2022-10-09 23:53     ` Thomas Monjalon
  1 sibling, 0 replies; 8+ messages in thread
From: fengchengwen @ 2022-09-07  0:31 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: Anatoly Burakov

Reviewed-by: Chengwen Feng <fengchengwen@huawei.com>

On 2022/9/7 0:45, Stephen Hemminger wrote:
> If DPDK is built with thread sanitizer it reports a race
> in setting of multiprocess file descriptor. The fix is to
> use atomic operations when updating mp_fd.
> 
> Build:
> $ meson -Db_sanitize=address build
> $ ninja -C build
> 
> Simple example:
> $ .build/app/dpdk-testpmd -l 1-3 --no-huge
> EAL: Detected CPU lcores: 16
> EAL: Detected NUMA nodes: 1
> EAL: Static memory layout is selected, amount of reserved memory can be adjusted with -m or --socket-mem
> EAL: Detected static linkage of DPDK
> EAL: Multi-process socket /run/user/1000/dpdk/rte/mp_socket
> EAL: Selected IOVA mode 'VA'
> testpmd: No probed ethernet devices
> testpmd: create a new mbuf pool <mb_pool_0>: n=163456, size=2176, socket=0
> testpmd: preferred mempool ops selected: ring_mp_mc
> EAL: Error - exiting with code: 1
>   Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
> ==================
> WARNING: ThreadSanitizer: data race (pid=87245)
>   Write of size 4 at 0x558e04d8ff70 by main thread:
>     #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x1e7d30c)
>     #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1e85929)
>     #2 rte_exit <null> (dpdk-testpmd+0x1e5bc0a)
>     #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x274011)
>     #4 main <null> (dpdk-testpmd+0x5cc15d)
> 
>   Previous read of size 4 at 0x558e04d8ff70 by thread T2:
>     #0 mp_handle <null> (dpdk-testpmd+0x1e7c439)
>     #1 ctrl_thread_init <null> (dpdk-testpmd+0x1e6ee1e)
> 
>   As if synchronized via sleep:
>     #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:366 (libtsan.so.0+0x6075e)
>     #1 get_tsc_freq <null> (dpdk-testpmd+0x1e92ff9)
>     #2 set_tsc_freq <null> (dpdk-testpmd+0x1e6f2fc)
>     #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1e931a4)
>     #4 rte_eal_init.cold <null> (dpdk-testpmd+0x29e578)
>     #5 main <null> (dpdk-testpmd+0x5cbc45)
> 
>   Location is global 'mp_fd' of size 4 at 0x558e04d8ff70 (dpdk-testpmd+0x000003122f70)
> 
>   Thread T2 'rte_mp_handle' (tid=87248, running) created by main thread at:
>     #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:969 (libtsan.so.0+0x5ad75)
>     #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x1e6efd0)
>     #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x29cb7c)
>     #3 rte_eal_init <null> (dpdk-testpmd+0x1e8662e)
>     #4 main <null> (dpdk-testpmd+0x5cbc45)
> 
> SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x1e7d30c) in rte_mp_channel_cleanup
> ==================
> ThreadSanitizer: reported 1 warnings
> 
> Fixes: bacaa2754017 ("eal: add channel for multi-process communication")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
> ---
>  lib/eal/common/eal_common_proc.c | 17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
> index 313060528fec..1fc1d6c53bd2 100644
> --- a/lib/eal/common/eal_common_proc.c
> +++ b/lib/eal/common/eal_common_proc.c
> @@ -260,7 +260,7 @@ rte_mp_action_unregister(const char *name)
>  }
>  

...

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] eal: fix data race in multi-process support
  2022-09-06 16:45   ` [PATCH v2] " Stephen Hemminger
  2022-09-07  0:31     ` fengchengwen
@ 2022-10-09 23:53     ` Thomas Monjalon
  1 sibling, 0 replies; 8+ messages in thread
From: Thomas Monjalon @ 2022-10-09 23:53 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Anatoly Burakov, stable

06/09/2022 18:45, Stephen Hemminger:
> If DPDK is built with thread sanitizer it reports a race
> in setting of multiprocess file descriptor. The fix is to
> use atomic operations when updating mp_fd.
> 
> Build:
> $ meson -Db_sanitize=address build
> $ ninja -C build
> 
> Simple example:
> $ .build/app/dpdk-testpmd -l 1-3 --no-huge
> EAL: Detected CPU lcores: 16
> EAL: Detected NUMA nodes: 1
> EAL: Static memory layout is selected, amount of reserved memory can be adjusted with -m or --socket-mem
> EAL: Detected static linkage of DPDK
> EAL: Multi-process socket /run/user/1000/dpdk/rte/mp_socket
> EAL: Selected IOVA mode 'VA'
> testpmd: No probed ethernet devices
> testpmd: create a new mbuf pool <mb_pool_0>: n=163456, size=2176, socket=0
> testpmd: preferred mempool ops selected: ring_mp_mc
> EAL: Error - exiting with code: 1
>   Cause: Creation of mbuf pool for socket 0 failed: Cannot allocate memory
> ==================
> WARNING: ThreadSanitizer: data race (pid=87245)
>   Write of size 4 at 0x558e04d8ff70 by main thread:
>     #0 rte_mp_channel_cleanup <null> (dpdk-testpmd+0x1e7d30c)
>     #1 rte_eal_cleanup <null> (dpdk-testpmd+0x1e85929)
>     #2 rte_exit <null> (dpdk-testpmd+0x1e5bc0a)
>     #3 mbuf_pool_create.cold <null> (dpdk-testpmd+0x274011)
>     #4 main <null> (dpdk-testpmd+0x5cc15d)
> 
>   Previous read of size 4 at 0x558e04d8ff70 by thread T2:
>     #0 mp_handle <null> (dpdk-testpmd+0x1e7c439)
>     #1 ctrl_thread_init <null> (dpdk-testpmd+0x1e6ee1e)
> 
>   As if synchronized via sleep:
>     #0 nanosleep ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:366 (libtsan.so.0+0x6075e)
>     #1 get_tsc_freq <null> (dpdk-testpmd+0x1e92ff9)
>     #2 set_tsc_freq <null> (dpdk-testpmd+0x1e6f2fc)
>     #3 rte_eal_timer_init <null> (dpdk-testpmd+0x1e931a4)
>     #4 rte_eal_init.cold <null> (dpdk-testpmd+0x29e578)
>     #5 main <null> (dpdk-testpmd+0x5cbc45)
> 
>   Location is global 'mp_fd' of size 4 at 0x558e04d8ff70 (dpdk-testpmd+0x000003122f70)
> 
>   Thread T2 'rte_mp_handle' (tid=87248, running) created by main thread at:
>     #0 pthread_create ../../../../src/libsanitizer/tsan/tsan_interceptors_posix.cpp:969 (libtsan.so.0+0x5ad75)
>     #1 rte_ctrl_thread_create <null> (dpdk-testpmd+0x1e6efd0)
>     #2 rte_mp_channel_init.cold <null> (dpdk-testpmd+0x29cb7c)
>     #3 rte_eal_init <null> (dpdk-testpmd+0x1e8662e)
>     #4 main <null> (dpdk-testpmd+0x5cbc45)
> 
> SUMMARY: ThreadSanitizer: data race (/home/shemminger/DPDK/main/build/app/dpdk-testpmd+0x1e7d30c) in rte_mp_channel_cleanup
> ==================
> ThreadSanitizer: reported 1 warnings
> 
> Fixes: bacaa2754017 ("eal: add channel for multi-process communication")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
Reviewed-by: Chengwen Feng <fengchengwen@huawei.com>

+Cc: stable@dpdk.org

Applied, thanks.



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-09 23:53 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-17 18:16 [PATCH] eal: fix data race in multi-process support Stephen Hemminger
2021-12-17 18:29 ` Stephen Hemminger
2022-02-13 11:39   ` Thomas Monjalon
2022-04-14 20:28     ` Stephen Hemminger
2022-04-20 15:13   ` Burakov, Anatoly
2022-09-06 16:45   ` [PATCH v2] " Stephen Hemminger
2022-09-07  0:31     ` fengchengwen
2022-10-09 23:53     ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).