DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 0/5] addressing races in concurrent process startup
@ 2023-12-12  4:25 Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon

In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.

Artemy Kovalyov (5):
  app/test-pm: add multiprocess test
  eal: fix multiprocess hotplug race
  ipc: fix mp channel closure to prevent message loss
  eal: fix first time primary autodetect
  eal: fix memzone fbarray cleanup

 app/meson.build                     |  1 +
 app/test-mp/main.c                  | 49 +++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build             |  8 ++++++
 app/test-mp/run.sh                  | 39 +++++++++++++++++++++++++++++
 lib/eal/common/eal_common_memzone.c | 12 +++++++++
 lib/eal/common/eal_common_proc.c    |  4 +--
 lib/eal/common/eal_private.h        |  5 ++++
 lib/eal/common/hotplug_mp.c         |  3 +++
 lib/eal/linux/eal.c                 |  3 ++-
 9 files changed, 121 insertions(+), 3 deletions(-)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/5] app/test-pm: add multiprocess test
  2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
@ 2023-12-12  4:25 ` Artemy Kovalyov
  2023-12-12 17:09   ` Stephen Hemminger
                     ` (3 more replies)
  2023-12-12  4:25 ` [PATCH 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
                   ` (3 subsequent siblings)
  4 siblings, 4 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, Anatoly Burakov

This commit adds a test scenario that initiates multiple processes
concurrently. These processes attach to the same shared heap, with an
automatic detection mechanism to identify the primary process.

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 app/meson.build         |  1 +
 app/test-mp/main.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build |  8 ++++++++
 app/test-mp/run.sh      | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

diff --git a/app/meson.build b/app/meson.build
index 8aaed59..1b80091 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -30,6 +30,7 @@ apps = [
         'test-flow-perf',
         'test-gpudev',
         'test-mldev',
+        'test-mp',
         'test-pipeline',
         'test-pmd',
         'test-regex',
diff --git a/app/test-mp/main.c b/app/test-mp/main.c
new file mode 100644
index 0000000..0a0fbbf
--- /dev/null
+++ b/app/test-mp/main.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+
+rte_atomic32_t g_count;
+
+static int
+done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
+{
+	rte_atomic32_dec(&g_count);
+	return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+	void *p;
+	int ret;
+
+	ret = rte_eal_init(argc, argv);
+	assert(ret >= 0);
+
+	rte_atomic32_set(&g_count, atoi(argv[++ret]));
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register("done", done);
+		assert(ret == 0);
+	}
+
+	p = rte_malloc(NULL, 0x1000000, 0x1000);
+	assert(p);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		uint64_t timeout = rte_rdtsc() + 5 * rte_get_tsc_hz();
+
+		while (rte_atomic32_read(&g_count) > 0)
+			assert(rte_rdtsc() < timeout);
+	} else {
+		struct rte_mp_msg msg = { .name = "done" };
+
+		rte_mp_sendmsg(&msg);
+	}
+
+	rte_eal_cleanup();
+	return 0;
+}
diff --git a/app/test-mp/meson.build b/app/test-mp/meson.build
new file mode 100644
index 0000000..feb9e20
--- /dev/null
+++ b/app/test-mp/meson.build
@@ -0,0 +1,8 @@
+if is_windows
+    build = false
+    reason = 'not supported on Windows'
+    subdir_done()
+endif
+
+sources = files('main.c')
+deps = ['eal'] # , 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
diff --git a/app/test-mp/run.sh b/app/test-mp/run.sh
new file mode 100755
index 0000000..8de07e2
--- /dev/null
+++ b/app/test-mp/run.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+logdir=/tmp/dpdk_test_mp
+repeat=1
+lastcore=$(($(nproc) - 1))
+log=1
+
+while getopts p:r:lL:d op; do case $op in
+    p) lastcore=$OPTARG ;;
+    r) repeat=$OPTARG ;;
+    L) logdir=$OPTARG ;;
+    l) log=0 ;;
+    d) debug=1 ;;
+esac done
+shift $((OPTIND-1))
+
+test=$1
+logpath=$logdir/$(date +%y%m%d-%H%M%S)
+
+rm -f core.*
+pkill dpdk-test-mp
+
+for j in $(seq $repeat) ; do
+    [ $log ] && mkdir -p $logpath/$j
+    for i in $(seq 0 $lastcore) ; do
+	args="-l $i --file-prefix=dpdk1 --proc-type=auto"
+	if [ $debug ] ; then
+	    args="$args --log-level=lib.eal:8"
+	fi
+	if [ $log ] ; then
+	    $test $args $lastcore >$logpath/$j/$i.log 2>&1 &
+	else
+	    $test $args $lastcore &
+	fi
+    done
+    wait || break
+    [ $(ls core.* 2>/dev/null | wc -l) -gt 0 ] && break
+    echo iteration $j passed
+done
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 2/5] eal: fix multiprocess hotplug race
  2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
@ 2023-12-12  4:25 ` Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable, Qi Zhang, Anatoly Burakov

There exists a time gap between the creation of the multiprocess channel
and the registration of request action handlers. Within this window, a
secondary process that receives an eal_dev_mp_request broadcast
notification might respond with ENOTSUP. This, in turn, causes the
rte_dev_probe() operation to fail in another secondary process.
To avoid this, disregarding ENOTSUP responses to attach notifications.

Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/hotplug_mp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/eal/common/hotplug_mp.c b/lib/eal/common/hotplug_mp.c
index 6027819..e6a3f6b 100644
--- a/lib/eal/common/hotplug_mp.c
+++ b/lib/eal/common/hotplug_mp.c
@@ -428,6 +428,9 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
 			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
 				resp->result == -EEXIST)
 				continue;
+			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+				resp->result == -ENOTSUP)
+				continue;
 			if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
 				resp->result == -ENOENT)
 				continue;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 3/5] ipc: fix mp channel closure to prevent message loss
  2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
@ 2023-12-12  4:25 ` Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 4/5] eal: fix first time primary autodetect Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
  4 siblings, 0 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev
  Cc: Thomas Monjalon, stable, Anatoly Burakov, Maxime Coquelin,
	David Marchand

This commit addresses an issue related to the cleanup of the
multiprocess channel. Previously, when closing the channel, there was a
risk of losing trailing messages. This issue was particularly noticeable
when broadcast message from primary to secondary processes was sent
while a secondary process was closing it's mp channel. In this fix, we
delete mp socket file before stopping mp receive thread.

Fixes: e7885281ded1 ("ipc: stop mp control thread on cleanup")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/eal_common_proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 728815c..d34fdda 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -593,7 +593,7 @@ enum async_action {
 }
 
 static void
-close_socket_fd(int fd)
+remove_socket_fd(int fd)
 {
 	char path[PATH_MAX];
 
@@ -672,9 +672,9 @@ enum async_action {
 	if (fd < 0)
 		return;
 
+	remove_socket_fd(fd);
 	pthread_cancel((pthread_t)mp_handle_tid.opaque_id);
 	rte_thread_join(mp_handle_tid, NULL);
-	close_socket_fd(fd);
 }
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 4/5] eal: fix first time primary autodetect
  2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
                   ` (2 preceding siblings ...)
  2023-12-12  4:25 ` [PATCH 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
@ 2023-12-12  4:25 ` Artemy Kovalyov
  2023-12-12  4:25 ` [PATCH 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
  4 siblings, 0 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable

If the configuration file is absent, the autodetection function should
generate and secure it. Otherwise, multiple simultaneous openings could
erroneously identify themselves as primary instances.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/linux/eal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 57da058..9b59cec 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -360,7 +360,7 @@ enum rte_proc_type_t
 		 * keep that open and don't close it to prevent a race condition
 		 * between multiple opens.
 		 */
-		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+		if (((mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600)) >= 0) &&
 				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
 			ptype = RTE_PROC_SECONDARY;
 	}
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 5/5] eal: fix memzone fbarray cleanup
  2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
                   ` (3 preceding siblings ...)
  2023-12-12  4:25 ` [PATCH 4/5] eal: fix first time primary autodetect Artemy Kovalyov
@ 2023-12-12  4:25 ` Artemy Kovalyov
  4 siblings, 0 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-12  4:25 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable, Anatoly Burakov

The initialization of the Memzone file-backed array ensures its
uniqueness by employing an exclusive lock. This is crucial because only
one primary process can exist per specific shm_id, which is further
protected by the exclusive EAL runtime configuration lock.

However, during the process closure, the exclusive lock on both the
fbarray and the configuration is not explicitly released. The
responsibility of releasing these locks is left to the generic quit
procedure. This can lead to a potential race condition when the
configuration is released before the fbarray.

To address this, we propose explicitly closing the memzone fbarray. This
ensures proper order of operations during process closure and prevents
any potential race conditions arising from the mismatched lock release
timings.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/eal_common_memzone.c | 12 ++++++++++++
 lib/eal/common/eal_private.h        |  5 +++++
 lib/eal/linux/eal.c                 |  1 +
 3 files changed, 18 insertions(+)

diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index 1f3e701..7db8029 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -447,6 +447,18 @@
 	return ret;
 }
 
+void
+rte_eal_memzone_cleanup(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		rte_fbarray_destroy(&mcfg->memzones);
+	}
+}
+
 /* Walk all reserved memory zones */
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 		      void *arg)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 4d2e806..944c365 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -81,6 +81,11 @@ struct rte_config {
 int rte_eal_memzone_init(void);
 
 /**
+ * Cleanup the memzone subsystem (private to eal).
+ */
+void rte_eal_memzone_cleanup(void);
+
+/**
  * Fill configuration with number of physical and logical processors
  *
  * This function is private to EAL.
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 9b59cec..dfcbe64 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1375,6 +1375,7 @@ static void rte_eal_init_alert(const char *msg)
 	eal_trace_fini();
 	eal_mp_dev_hotplug_cleanup();
 	rte_eal_alarm_cleanup();
+	rte_eal_memzone_cleanup();
 	/* after this point, any DPDK pointers will become dangling */
 	rte_eal_memory_detach();
 	rte_eal_malloc_heap_cleanup();
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/5] app/test-pm: add multiprocess test
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
@ 2023-12-12 17:09   ` Stephen Hemminger
  2023-12-17  3:53   ` [PATCH v2 1/5] app/test-mp: " Artemy Kovalyov
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Stephen Hemminger @ 2023-12-12 17:09 UTC (permalink / raw)
  To: Artemy Kovalyov; +Cc: dev, Thomas Monjalon, Anatoly Burakov

On Tue, 12 Dec 2023 06:25:12 +0200
Artemy Kovalyov <artemyko@nvidia.com> wrote:

> +rte_atomic32_t g_count;
> +
> +static int
> +done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
> +{
> +	rte_atomic32_dec(&g_count);
> +	return 0;
> +}

Local variable, should be static.

Also, assert may not be the ideal way to report test failures.

The preferred way would be to use RTE_TEST_ASSERT() and RTE_TEST_ASSERT_EQUAL()

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 1/5] app/test-mp: add multiprocess test
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
  2023-12-12 17:09   ` Stephen Hemminger
@ 2023-12-17  3:53   ` Artemy Kovalyov
  2024-03-06 20:20     ` David Marchand
  2024-03-07  6:59   ` [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
  2024-03-07  7:01   ` Artemy Kovalyov
  3 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2023-12-17  3:53 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, Anatoly Burakov

This commit adds a test scenario that initiates multiple processes
concurrently. These processes attach to the same shared heap, with an
automatic detection mechanism to identify the primary process.

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
v2:
CI && CR fixes
- add missing includes
- use RTE_TEST_ASSERT
---
 app/meson.build         |  1 +
 app/test-mp/main.c      | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build |  8 ++++++++
 app/test-mp/run.sh      | 40 +++++++++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

diff --git a/app/meson.build b/app/meson.build
index 8aaed59..1b80091 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -30,6 +30,7 @@ apps = [
         'test-flow-perf',
         'test-gpudev',
         'test-mldev',
+        'test-mp',
         'test-pipeline',
         'test-pmd',
         'test-regex',
diff --git a/app/test-mp/main.c b/app/test-mp/main.c
new file mode 100644
index 0000000..c2f309e
--- /dev/null
+++ b/app/test-mp/main.c
@@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_cycles.h>
+#include <rte_test.h>
+
+static rte_atomic32_t g_count;
+
+static int
+done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
+{
+	rte_atomic32_dec(&g_count);
+	return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+	void *p;
+	int ret;
+
+	ret = rte_eal_init(argc, argv);
+	RTE_TEST_ASSERT(ret >= 0, "init failed\n");
+
+	rte_atomic32_set(&g_count, atoi(argv[++ret]));
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register("done", done);
+		RTE_TEST_ASSERT_SUCCESS(ret, "register action failed\n");
+	}
+
+	p = rte_malloc(NULL, 0x1000000, 0x1000);
+	RTE_TEST_ASSERT_NOT_NULL(p, "allocation failed\n");
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		uint64_t timeout = rte_rdtsc() + 5 * rte_get_tsc_hz();
+
+		while (rte_atomic32_read(&g_count) > 0)
+			RTE_TEST_ASSERT(rte_rdtsc() < timeout, "timeout\n");
+	} else {
+		struct rte_mp_msg msg = { .name = "done" };
+
+		rte_mp_sendmsg(&msg);
+	}
+
+	rte_eal_cleanup();
+	return 0;
+}
diff --git a/app/test-mp/meson.build b/app/test-mp/meson.build
new file mode 100644
index 0000000..99de379
--- /dev/null
+++ b/app/test-mp/meson.build
@@ -0,0 +1,8 @@
+if is_windows
+    build = false
+    reason = 'not supported on Windows'
+    subdir_done()
+endif
+
+sources = files('main.c')
+deps = ['eal']
diff --git a/app/test-mp/run.sh b/app/test-mp/run.sh
new file mode 100755
index 0000000..03a71c7
--- /dev/null
+++ b/app/test-mp/run.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+logdir=/tmp/dpdk_test_mp
+repeat=1
+lastcore=$(($(nproc) - 1))
+log=1
+
+while getopts p:r:lL:d op; do case $op in
+    p) lastcore=$OPTARG ;;
+    r) repeat=$OPTARG ;;
+    L) logdir=$OPTARG ;;
+    l) log=0 ;;
+    d) debug=1 ;;
+esac done
+shift $((OPTIND-1))
+
+test=$1
+logpath=$logdir/$(date +%y%m%d-%H%M%S)
+
+rm -f core.*
+pkill dpdk-test-mp
+
+for j in $(seq $repeat) ; do
+    [ $log ] && mkdir -p $logpath/$j
+    pids=()
+    for i in $(seq 0 $lastcore) ; do
+	args="-l $i --file-prefix=dpdk1 --proc-type=auto"
+	if [ $debug ] ; then
+	    args="$args --log-level=lib.eal:8"
+	fi
+	if [ $log ] ; then
+	    $test $args $lastcore >$logpath/$j/$i.log 2>&1 &
+	else
+	    $test $args $lastcore &
+	fi
+	pids+=($!)
+    done
+    wait ${pids[@]} || { echo iteration $j failed ; break ; }
+    echo iteration $j passed
+done
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 1/5] app/test-mp: add multiprocess test
  2023-12-17  3:53   ` [PATCH v2 1/5] app/test-mp: " Artemy Kovalyov
@ 2024-03-06 20:20     ` David Marchand
  0 siblings, 0 replies; 21+ messages in thread
From: David Marchand @ 2024-03-06 20:20 UTC (permalink / raw)
  To: Artemy Kovalyov; +Cc: dev, Thomas Monjalon, Anatoly Burakov

On Sun, Dec 17, 2023 at 4:54 AM Artemy Kovalyov <artemyko@nvidia.com> wrote:
>
> This commit adds a test scenario that initiates multiple processes
> concurrently. These processes attach to the same shared heap, with an
> automatic detection mechanism to identify the primary process.
>
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>

It looks like a new revision for a patch from the
https://patchwork.dpdk.org/project/dpdk/list/?series=30517&state=*
series.
Please send a full new series if you intend to get it merged.

Thanks.

-- 
David Marchand


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 0/5] addressing races in concurrent process startup
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
  2023-12-12 17:09   ` Stephen Hemminger
  2023-12-17  3:53   ` [PATCH v2 1/5] app/test-mp: " Artemy Kovalyov
@ 2024-03-07  6:59   ` Artemy Kovalyov
  2024-03-07  7:01   ` Artemy Kovalyov
  3 siblings, 0 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  6:59 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon

In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.

Artemy Kovalyov (5):
  app/test-mp: add multiprocess test
  eal: fix multiprocess hotplug race
  ipc: fix mp channel closure to prevent message loss
  eal: fix first time primary autodetect
  eal: fix memzone fbarray cleanup

 app/meson.build                     |  1 +
 app/test-mp/main.c                  | 52 +++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build             |  8 ++++++
 app/test-mp/run.sh                  | 40 ++++++++++++++++++++++++++++
 lib/eal/common/eal_common_memzone.c | 12 +++++++++
 lib/eal/common/eal_common_proc.c    |  4 +--
 lib/eal/common/eal_private.h        |  5 ++++
 lib/eal/common/hotplug_mp.c         |  3 +++
 lib/eal/linux/eal.c                 |  3 ++-
 9 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 0/5] addressing races in concurrent process startup
  2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
                     ` (2 preceding siblings ...)
  2024-03-07  6:59   ` [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
@ 2024-03-07  7:01   ` Artemy Kovalyov
  2024-03-07  7:01     ` [PATCH v2 1/5] app/test-mp: add multiprocess test Artemy Kovalyov
                       ` (4 more replies)
  3 siblings, 5 replies; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon

In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.

Artemy Kovalyov (5):
  app/test-mp: add multiprocess test
  eal: fix multiprocess hotplug race
  ipc: fix mp channel closure to prevent message loss
  eal: fix first time primary autodetect
  eal: fix memzone fbarray cleanup

 app/meson.build                     |  1 +
 app/test-mp/main.c                  | 52 +++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build             |  8 ++++++
 app/test-mp/run.sh                  | 40 ++++++++++++++++++++++++++++
 lib/eal/common/eal_common_memzone.c | 12 +++++++++
 lib/eal/common/eal_common_proc.c    |  4 +--
 lib/eal/common/eal_private.h        |  5 ++++
 lib/eal/common/hotplug_mp.c         |  3 +++
 lib/eal/linux/eal.c                 |  3 ++-
 9 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 1/5] app/test-mp: add multiprocess test
  2024-03-07  7:01   ` Artemy Kovalyov
@ 2024-03-07  7:01     ` Artemy Kovalyov
  2024-03-13 16:05       ` Burakov, Anatoly
  2024-03-07  7:01     ` [PATCH v2 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
                       ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, Anatoly Burakov

This commit adds a test scenario that initiates multiple processes
concurrently. These processes attach to the same shared heap, with an
automatic detection mechanism to identify the primary process.

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 app/meson.build         |  1 +
 app/test-mp/main.c      | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 app/test-mp/meson.build |  8 ++++++++
 app/test-mp/run.sh      | 40 +++++++++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

diff --git a/app/meson.build b/app/meson.build
index 8aaed59..1b80091 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -30,6 +30,7 @@ apps = [
         'test-flow-perf',
         'test-gpudev',
         'test-mldev',
+        'test-mp',
         'test-pipeline',
         'test-pmd',
         'test-regex',
diff --git a/app/test-mp/main.c b/app/test-mp/main.c
new file mode 100644
index 0000000..c2f309e
--- /dev/null
+++ b/app/test-mp/main.c
@@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_cycles.h>
+#include <rte_test.h>
+
+static rte_atomic32_t g_count;
+
+static int
+done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
+{
+	rte_atomic32_dec(&g_count);
+	return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+	void *p;
+	int ret;
+
+	ret = rte_eal_init(argc, argv);
+	RTE_TEST_ASSERT(ret >= 0, "init failed\n");
+
+	rte_atomic32_set(&g_count, atoi(argv[++ret]));
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register("done", done);
+		RTE_TEST_ASSERT_SUCCESS(ret, "register action failed\n");
+	}
+
+	p = rte_malloc(NULL, 0x1000000, 0x1000);
+	RTE_TEST_ASSERT_NOT_NULL(p, "allocation failed\n");
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		uint64_t timeout = rte_rdtsc() + 5 * rte_get_tsc_hz();
+
+		while (rte_atomic32_read(&g_count) > 0)
+			RTE_TEST_ASSERT(rte_rdtsc() < timeout, "timeout\n");
+	} else {
+		struct rte_mp_msg msg = { .name = "done" };
+
+		rte_mp_sendmsg(&msg);
+	}
+
+	rte_eal_cleanup();
+	return 0;
+}
diff --git a/app/test-mp/meson.build b/app/test-mp/meson.build
new file mode 100644
index 0000000..99de379
--- /dev/null
+++ b/app/test-mp/meson.build
@@ -0,0 +1,8 @@
+if is_windows
+    build = false
+    reason = 'not supported on Windows'
+    subdir_done()
+endif
+
+sources = files('main.c')
+deps = ['eal']
diff --git a/app/test-mp/run.sh b/app/test-mp/run.sh
new file mode 100755
index 0000000..03a71c7
--- /dev/null
+++ b/app/test-mp/run.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+logdir=/tmp/dpdk_test_mp
+repeat=1
+lastcore=$(($(nproc) - 1))
+log=1
+
+while getopts p:r:lL:d op; do case $op in
+    p) lastcore=$OPTARG ;;
+    r) repeat=$OPTARG ;;
+    L) logdir=$OPTARG ;;
+    l) log=0 ;;
+    d) debug=1 ;;
+esac done
+shift $((OPTIND-1))
+
+test=$1
+logpath=$logdir/$(date +%y%m%d-%H%M%S)
+
+rm -f core.*
+pkill dpdk-test-mp
+
+for j in $(seq $repeat) ; do
+    [ $log ] && mkdir -p $logpath/$j
+    pids=()
+    for i in $(seq 0 $lastcore) ; do
+	args="-l $i --file-prefix=dpdk1 --proc-type=auto"
+	if [ $debug ] ; then
+	    args="$args --log-level=lib.eal:8"
+	fi
+	if [ $log ] ; then
+	    $test $args $lastcore >$logpath/$j/$i.log 2>&1 &
+	else
+	    $test $args $lastcore &
+	fi
+	pids+=($!)
+    done
+    wait ${pids[@]} || { echo iteration $j failed ; break ; }
+    echo iteration $j passed
+done
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 2/5] eal: fix multiprocess hotplug race
  2024-03-07  7:01   ` Artemy Kovalyov
  2024-03-07  7:01     ` [PATCH v2 1/5] app/test-mp: add multiprocess test Artemy Kovalyov
@ 2024-03-07  7:01     ` Artemy Kovalyov
  2024-03-13 16:05       ` Burakov, Anatoly
  2024-03-07  7:01     ` [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable, Qi Zhang, Anatoly Burakov

There exists a time gap between the creation of the multiprocess channel
and the registration of request action handlers. Within this window, a
secondary process that receives an eal_dev_mp_request broadcast
notification might respond with ENOTSUP. This, in turn, causes the
rte_dev_probe() operation to fail in another secondary process.
To avoid this, disregarding ENOTSUP responses to attach notifications.

Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/hotplug_mp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/eal/common/hotplug_mp.c b/lib/eal/common/hotplug_mp.c
index 6027819..e6a3f6b 100644
--- a/lib/eal/common/hotplug_mp.c
+++ b/lib/eal/common/hotplug_mp.c
@@ -428,6 +428,9 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
 			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
 				resp->result == -EEXIST)
 				continue;
+			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+				resp->result == -ENOTSUP)
+				continue;
 			if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
 				resp->result == -ENOENT)
 				continue;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss
  2024-03-07  7:01   ` Artemy Kovalyov
  2024-03-07  7:01     ` [PATCH v2 1/5] app/test-mp: add multiprocess test Artemy Kovalyov
  2024-03-07  7:01     ` [PATCH v2 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
@ 2024-03-07  7:01     ` Artemy Kovalyov
  2024-03-13 16:06       ` Burakov, Anatoly
  2024-03-07  7:01     ` [PATCH v2 4/5] eal: fix first time primary autodetect Artemy Kovalyov
  2024-03-07  7:01     ` [PATCH v2 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
  4 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev
  Cc: Thomas Monjalon, stable, Anatoly Burakov, David Marchand,
	Maxime Coquelin

This commit addresses an issue related to the cleanup of the
multiprocess channel. Previously, when closing the channel, there was a
risk of losing trailing messages. This issue was particularly noticeable
when broadcast message from primary to secondary processes was sent
while a secondary process was closing it's mp channel. In this fix, we
delete mp socket file before stopping mp receive thread.

Fixes: e7885281ded1 ("ipc: stop mp control thread on cleanup")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/eal_common_proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 728815c..d34fdda 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -593,7 +593,7 @@ enum async_action {
 }
 
 static void
-close_socket_fd(int fd)
+remove_socket_fd(int fd)
 {
 	char path[PATH_MAX];
 
@@ -672,9 +672,9 @@ enum async_action {
 	if (fd < 0)
 		return;
 
+	remove_socket_fd(fd);
 	pthread_cancel((pthread_t)mp_handle_tid.opaque_id);
 	rte_thread_join(mp_handle_tid, NULL);
-	close_socket_fd(fd);
 }
 
 /**
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 4/5] eal: fix first time primary autodetect
  2024-03-07  7:01   ` Artemy Kovalyov
                       ` (2 preceding siblings ...)
  2024-03-07  7:01     ` [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
@ 2024-03-07  7:01     ` Artemy Kovalyov
  2024-03-13 16:06       ` Burakov, Anatoly
  2024-03-07  7:01     ` [PATCH v2 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
  4 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable

If the configuration file is absent, the autodetection function should
generate and secure it. Otherwise, multiple simultaneous openings could
erroneously identify themselves as primary instances.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/linux/eal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 57da058..9b59cec 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -360,7 +360,7 @@ enum rte_proc_type_t
 		 * keep that open and don't close it to prevent a race condition
 		 * between multiple opens.
 		 */
-		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+		if (((mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600)) >= 0) &&
 				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
 			ptype = RTE_PROC_SECONDARY;
 	}
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2 5/5] eal: fix memzone fbarray cleanup
  2024-03-07  7:01   ` Artemy Kovalyov
                       ` (3 preceding siblings ...)
  2024-03-07  7:01     ` [PATCH v2 4/5] eal: fix first time primary autodetect Artemy Kovalyov
@ 2024-03-07  7:01     ` Artemy Kovalyov
  2024-03-13 16:17       ` Burakov, Anatoly
  4 siblings, 1 reply; 21+ messages in thread
From: Artemy Kovalyov @ 2024-03-07  7:01 UTC (permalink / raw)
  To: dev; +Cc: Thomas Monjalon, stable, Anatoly Burakov

The initialization of the Memzone file-backed array ensures its
uniqueness by employing an exclusive lock. This is crucial because only
one primary process can exist per specific shm_id, which is further
protected by the exclusive EAL runtime configuration lock.

However, during the process closure, the exclusive lock on both the
fbarray and the configuration is not explicitly released. The
responsibility of releasing these locks is left to the generic quit
procedure. This can lead to a potential race condition when the
configuration is released before the fbarray.

To address this, we propose explicitly closing the memzone fbarray. This
ensures proper order of operations during process closure and prevents
any potential race conditions arising from the mismatched lock release
timings.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
---
 lib/eal/common/eal_common_memzone.c | 12 ++++++++++++
 lib/eal/common/eal_private.h        |  5 +++++
 lib/eal/linux/eal.c                 |  1 +
 3 files changed, 18 insertions(+)

diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index 1f3e701..7db8029 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -447,6 +447,18 @@
 	return ret;
 }
 
+void
+rte_eal_memzone_cleanup(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		rte_fbarray_destroy(&mcfg->memzones);
+	}
+}
+
 /* Walk all reserved memory zones */
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 		      void *arg)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 4d2e806..944c365 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -81,6 +81,11 @@ struct rte_config {
 int rte_eal_memzone_init(void);
 
 /**
+ * Cleanup the memzone subsystem (private to eal).
+ */
+void rte_eal_memzone_cleanup(void);
+
+/**
  * Fill configuration with number of physical and logical processors
  *
  * This function is private to EAL.
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 9b59cec..dfcbe64 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1375,6 +1375,7 @@ static void rte_eal_init_alert(const char *msg)
 	eal_trace_fini();
 	eal_mp_dev_hotplug_cleanup();
 	rte_eal_alarm_cleanup();
+	rte_eal_memzone_cleanup();
 	/* after this point, any DPDK pointers will become dangling */
 	rte_eal_memory_detach();
 	rte_eal_malloc_heap_cleanup();
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 1/5] app/test-mp: add multiprocess test
  2024-03-07  7:01     ` [PATCH v2 1/5] app/test-mp: add multiprocess test Artemy Kovalyov
@ 2024-03-13 16:05       ` Burakov, Anatoly
  0 siblings, 0 replies; 21+ messages in thread
From: Burakov, Anatoly @ 2024-03-13 16:05 UTC (permalink / raw)
  To: Artemy Kovalyov, dev; +Cc: Thomas Monjalon

On 3/7/2024 8:01 AM, Artemy Kovalyov wrote:
> This commit adds a test scenario that initiates multiple processes
> concurrently. These processes attach to the same shared heap, with an
> automatic detection mechanism to identify the primary process.
> 
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
> ---
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 2/5] eal: fix multiprocess hotplug race
  2024-03-07  7:01     ` [PATCH v2 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
@ 2024-03-13 16:05       ` Burakov, Anatoly
  0 siblings, 0 replies; 21+ messages in thread
From: Burakov, Anatoly @ 2024-03-13 16:05 UTC (permalink / raw)
  To: Artemy Kovalyov, dev; +Cc: Thomas Monjalon, stable, Qi Zhang

On 3/7/2024 8:01 AM, Artemy Kovalyov wrote:
> There exists a time gap between the creation of the multiprocess channel
> and the registration of request action handlers. Within this window, a
> secondary process that receives an eal_dev_mp_request broadcast
> notification might respond with ENOTSUP. This, in turn, causes the
> rte_dev_probe() operation to fail in another secondary process.
> To avoid this, disregarding ENOTSUP responses to attach notifications.
> 
> Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
> ---
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss
  2024-03-07  7:01     ` [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
@ 2024-03-13 16:06       ` Burakov, Anatoly
  0 siblings, 0 replies; 21+ messages in thread
From: Burakov, Anatoly @ 2024-03-13 16:06 UTC (permalink / raw)
  To: Artemy Kovalyov, dev
  Cc: Thomas Monjalon, stable, David Marchand, Maxime Coquelin

On 3/7/2024 8:01 AM, Artemy Kovalyov wrote:
> This commit addresses an issue related to the cleanup of the
> multiprocess channel. Previously, when closing the channel, there was a
> risk of losing trailing messages. This issue was particularly noticeable
> when broadcast message from primary to secondary processes was sent
> while a secondary process was closing it's mp channel. In this fix, we
> delete mp socket file before stopping mp receive thread.
> 
> Fixes: e7885281ded1 ("ipc: stop mp control thread on cleanup")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
> ---
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 4/5] eal: fix first time primary autodetect
  2024-03-07  7:01     ` [PATCH v2 4/5] eal: fix first time primary autodetect Artemy Kovalyov
@ 2024-03-13 16:06       ` Burakov, Anatoly
  0 siblings, 0 replies; 21+ messages in thread
From: Burakov, Anatoly @ 2024-03-13 16:06 UTC (permalink / raw)
  To: Artemy Kovalyov, dev; +Cc: Thomas Monjalon, stable

On 3/7/2024 8:01 AM, Artemy Kovalyov wrote:
> If the configuration file is absent, the autodetection function should
> generate and secure it. Otherwise, multiple simultaneous openings could
> erroneously identify themselves as primary instances.
> 
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
> ---

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 5/5] eal: fix memzone fbarray cleanup
  2024-03-07  7:01     ` [PATCH v2 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
@ 2024-03-13 16:17       ` Burakov, Anatoly
  0 siblings, 0 replies; 21+ messages in thread
From: Burakov, Anatoly @ 2024-03-13 16:17 UTC (permalink / raw)
  To: Artemy Kovalyov, dev; +Cc: Thomas Monjalon, stable

On 3/7/2024 8:01 AM, Artemy Kovalyov wrote:
> The initialization of the Memzone file-backed array ensures its
> uniqueness by employing an exclusive lock. This is crucial because only
> one primary process can exist per specific shm_id, which is further
> protected by the exclusive EAL runtime configuration lock.
> 

I think you meant to say "prefix", not "shm_id".

> However, during the process closure, the exclusive lock on both the
> fbarray and the configuration is not explicitly released. The
> responsibility of releasing these locks is left to the generic quit
> procedure. This can lead to a potential race condition when the
> configuration is released before the fbarray.
> 
> To address this, we propose explicitly closing the memzone fbarray. This
> ensures proper order of operations during process closure and prevents
> any potential race conditions arising from the mismatched lock release
> timings.
> 
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Artemy Kovalyov <artemyko@nvidia.com>
> ---

I would suggest having a different Fixes: ID, because fbarrays were only 
added in 18.05 when we added dynamic memory support. I propose using 
this commit ID instead:

49df3db84883 ("memzone: replace memzone array with fbarray")

This is the first commit where memzones used fbarrays.

> +void
> +rte_eal_memzone_cleanup(void)
> +{
> +	struct rte_mem_config *mcfg;
> +
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		rte_fbarray_destroy(&mcfg->memzones);
> +	}

Nitpick: extraneous brackets, this is a one liner so they're not needed.

With changes above,

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2024-03-13 16:17 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-12  4:25 [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
2023-12-12  4:25 ` [PATCH 1/5] app/test-pm: add multiprocess test Artemy Kovalyov
2023-12-12 17:09   ` Stephen Hemminger
2023-12-17  3:53   ` [PATCH v2 1/5] app/test-mp: " Artemy Kovalyov
2024-03-06 20:20     ` David Marchand
2024-03-07  6:59   ` [PATCH 0/5] addressing races in concurrent process startup Artemy Kovalyov
2024-03-07  7:01   ` Artemy Kovalyov
2024-03-07  7:01     ` [PATCH v2 1/5] app/test-mp: add multiprocess test Artemy Kovalyov
2024-03-13 16:05       ` Burakov, Anatoly
2024-03-07  7:01     ` [PATCH v2 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
2024-03-13 16:05       ` Burakov, Anatoly
2024-03-07  7:01     ` [PATCH v2 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
2024-03-13 16:06       ` Burakov, Anatoly
2024-03-07  7:01     ` [PATCH v2 4/5] eal: fix first time primary autodetect Artemy Kovalyov
2024-03-13 16:06       ` Burakov, Anatoly
2024-03-07  7:01     ` [PATCH v2 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov
2024-03-13 16:17       ` Burakov, Anatoly
2023-12-12  4:25 ` [PATCH 2/5] eal: fix multiprocess hotplug race Artemy Kovalyov
2023-12-12  4:25 ` [PATCH 3/5] ipc: fix mp channel closure to prevent message loss Artemy Kovalyov
2023-12-12  4:25 ` [PATCH 4/5] eal: fix first time primary autodetect Artemy Kovalyov
2023-12-12  4:25 ` [PATCH 5/5] eal: fix memzone fbarray cleanup Artemy Kovalyov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).