* [PATCH v3] lib/eal: fix segfaults due to thread exit order
[not found] <20220523111642.10406-1-zhichaox.zeng@intel.com>
@ 2022-05-30 13:47 ` zhichaox.zeng
2022-05-30 16:26 ` Stephen Hemminger
` (3 more replies)
0 siblings, 4 replies; 16+ messages in thread
From: zhichaox.zeng @ 2022-05-30 13:47 UTC (permalink / raw)
To: dev, qiming.yang, bruce.richardson, hkalra, david.marchand
Cc: aconole, olivier.matz, thomas, stable, Zhichao Zeng
From: Zhichao Zeng <zhichaox.zeng@intel.com>
The eal-intr-thread is not closed before memory cleanup in the
process of exiting. There is a small probability that when the
eal-intr-thread is about to use some pointers, the memory were
just cleaned, which cause the segment fault error caught by ASan.
This patch close the eal-intr-thread before memory cleanup when
exiting to avoid segment fault.
---
v2: add the same API for FreeBSD
---
v3: fix rte_eal_cleanup crash in debug_autotest
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
lib/eal/common/eal_private.h | 7 +++++++
lib/eal/freebsd/eal.c | 22 +++++++++++++++++++++-
lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
lib/eal/linux/eal.c | 21 ++++++++++++++++++++-
lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
5 files changed, 72 insertions(+), 2 deletions(-)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 44d14241f0..7adf41b7d7 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
*/
int rte_eal_intr_init(void);
+/**
+ * Destroy interrupt handling thread.
+ *
+ * This function is private to EAL.
+ */
+void rte_eal_intr_destroy(void);
+
/**
* Close the default log stream
*
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index a6b20960f2..6875a9f6d2 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -72,6 +72,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge the running status of the eal */
+static uint32_t run_once;
int
eal_clean_runtime_dir(void)
@@ -574,12 +576,23 @@ static void rte_eal_init_alert(const char *msg)
RTE_LOG(ERR, EAL, "%s\n", msg);
}
+static void warn_parent(void)
+{
+ RTE_LOG(WARNING, EAL, "fork() was called, DPDK won't work in the child "
+ "process unless it calls rte_eal_init()\n");
+}
+
+static void scratch_child(void)
+{
+ /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
- static uint32_t run_once;
uint32_t has_run = 0;
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
@@ -883,6 +896,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, warn_parent, scratch_child);
+
return fctret;
}
@@ -891,8 +906,13 @@ rte_eal_cleanup(void)
{
struct internal_config *internal_conf =
eal_get_internal_configuration();
+
+ if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
+ return 0;
+
rte_service_finalize();
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
rte_eal_alarm_cleanup();
diff --git a/lib/eal/freebsd/eal_interrupts.c b/lib/eal/freebsd/eal_interrupts.c
index 9f720bdc8f..cac3859b06 100644
--- a/lib/eal/freebsd/eal_interrupts.c
+++ b/lib/eal/freebsd/eal_interrupts.c
@@ -648,6 +648,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close kqueue */
+ close(kq);
+ kq = -1;
+}
+
int
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data)
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..32c7adaa52 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -76,6 +76,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge the running status of the eal */
+static uint32_t run_once;
int
eal_clean_runtime_dir(void)
@@ -857,12 +859,23 @@ is_iommu_enabled(void)
return n > 2;
}
+static void warn_parent(void)
+{
+ RTE_LOG(WARNING, EAL, "fork() was called, DPDK won't work in the child "
+ "process unless it calls rte_eal_init()\n");
+}
+
+static void scratch_child(void)
+{
+ /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
- static uint32_t run_once;
uint32_t has_run = 0;
const char *p;
static char logid[PATH_MAX];
@@ -1228,6 +1241,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, warn_parent, scratch_child);
+
return fctret;
}
@@ -1257,6 +1272,9 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
+ return 0;
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
@@ -1266,6 +1284,7 @@ rte_eal_cleanup(void)
vfio_mp_sync_cleanup();
#endif
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
eal_mp_dev_hotplug_cleanup();
diff --git a/lib/eal/linux/eal_interrupts.c b/lib/eal/linux/eal_interrupts.c
index d52ec8eb4c..7e9853e8e7 100644
--- a/lib/eal/linux/eal_interrupts.c
+++ b/lib/eal/linux/eal_interrupts.c
@@ -1199,6 +1199,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close the pipe used by epoll */
+ close(intr_pipe.writefd);
+ close(intr_pipe.readfd);
+}
+
static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v3] lib/eal: fix segfaults due to thread exit order
2022-05-30 13:47 ` [PATCH v3] lib/eal: fix segfaults due to thread exit order zhichaox.zeng
@ 2022-05-30 16:26 ` Stephen Hemminger
2022-05-30 16:28 ` Stephen Hemminger
` (2 subsequent siblings)
3 siblings, 0 replies; 16+ messages in thread
From: Stephen Hemminger @ 2022-05-30 16:26 UTC (permalink / raw)
To: zhichaox.zeng
Cc: dev, qiming.yang, bruce.richardson, hkalra, david.marchand,
aconole, olivier.matz, thomas, stable
On Mon, 30 May 2022 13:47:38 +0000
zhichaox.zeng@intel.com wrote:
> +static void warn_parent(void)
> +{
> + RTE_LOG(WARNING, EAL, "fork() was called, DPDK won't work in the child "
> + "process unless it calls rte_eal_init()\n");
> +}
> +
Too wordy. Don't break messages across lines.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v3] lib/eal: fix segfaults due to thread exit order
2022-05-30 13:47 ` [PATCH v3] lib/eal: fix segfaults due to thread exit order zhichaox.zeng
2022-05-30 16:26 ` Stephen Hemminger
@ 2022-05-30 16:28 ` Stephen Hemminger
2022-06-02 8:21 ` Zeng, ZhichaoX
2022-06-07 10:14 ` Zeng, ZhichaoX
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
3 siblings, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2022-05-30 16:28 UTC (permalink / raw)
To: zhichaox.zeng
Cc: dev, qiming.yang, bruce.richardson, hkalra, david.marchand,
aconole, olivier.matz, thomas, stable
On Mon, 30 May 2022 13:47:38 +0000
zhichaox.zeng@intel.com wrote:
> @@ -883,6 +896,8 @@ rte_eal_init(int argc, char **argv)
>
> eal_mcfg_complete();
>
> + pthread_atfork(NULL, warn_parent, scratch_child);
> +
> return fctret;
> }
There are lots of other cases where DPDK will die if you fork()
in a DPDK process then call DPDK functions in child.
Not sure what the problem you are trying to solve is?
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v3] lib/eal: fix segfaults due to thread exit order
2022-05-30 16:28 ` Stephen Hemminger
@ 2022-06-02 8:21 ` Zeng, ZhichaoX
0 siblings, 0 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-06-02 8:21 UTC (permalink / raw)
To: Stephen Hemminger
Cc: dev, Yang, Qiming, Richardson, Bruce, hkalra, david.marchand,
aconole, Matz, Olivier, thomas, stable
Hi Stephen:
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, May 31, 2022 12:29 AM
> To: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Cc: dev@dpdk.org; Yang, Qiming <qiming.yang@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>; hkalra@marvell.com; david.marchand@redhat.com; aconole@redhat.com; Matz, Olivier > > <olivier.matz@6wind.com>; thomas@monjalon.net; stable@dpdk.org
> Subject: Re: [PATCH v3] lib/eal: fix segfaults due to thread exit order
>
> On Mon, 30 May 2022 13:47:38 +0000
> zhichaox.zeng@intel.com wrote:
> > @@ -883,6 +896,8 @@ rte_eal_init(int argc, char **argv)
> >
> > eal_mcfg_complete();
> >
> > + pthread_atfork(NULL, warn_parent, scratch_child);
> > +
> > return fctret;
> > }
> There are lots of other cases where DPDK will die if you fork() in a DPDK process then call DPDK functions in child.
> Not sure what the problem you are trying to solve is?
The original goal of this patch was to cancel eal-intr-thread before memory cleanup to avoid a small probability of segfaults.
But in the debug_autotest test of dpdk-test, fork() is called and the exit process is tested in the child process which will call the rte_eal_cleanup function and mistakenly clean up non-existing threads. This will cause segmentation fault.
Based on patch v2, atomic operation is added to determine whether it is a child process, so that the cleaning process will not execute in the child process to avoid the above problem.
Regards
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v3] lib/eal: fix segfaults due to thread exit order
2022-05-30 13:47 ` [PATCH v3] lib/eal: fix segfaults due to thread exit order zhichaox.zeng
2022-05-30 16:26 ` Stephen Hemminger
2022-05-30 16:28 ` Stephen Hemminger
@ 2022-06-07 10:14 ` Zeng, ZhichaoX
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
3 siblings, 0 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-06-07 10:14 UTC (permalink / raw)
To: dev, hkalra, david.marchand
Cc: aconole, Matz, Olivier, thomas, stable, Stephen Hemminger,
Richardson, Bruce, Yang, Qiming
Hi David, Harman
Please review this patch at your convenience. Thanks!
In addition, I tried to figure out the reason for the failure of the CI test. There is a saying that
this is a problem with Asan when cancelling a thread that is waiting on epoll. After adding the
'-fstack-protector-all' parameter, there will be no exception. But I don't know the software
environment for automated testing, so I can't verify this statement.
Regards
> Subject: [PATCH v3] lib/eal: fix segfaults due to thread exit order
> From: Zhichao Zeng <zhichaox.zeng@intel.com>
> The eal-intr-thread is not closed before memory cleanup in the process of exiting. There is a small probability that when the eal-intr-thread is about to use some pointers, the memory were just cleaned, which cause the segment fault error caught by ASan.
> This patch close the eal-intr-thread before memory cleanup when exiting to avoid segment fault.
> ---
> v2: add the same API for FreeBSD
> ---
> v3: fix rte_eal_cleanup crash in debug_autotest
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v4] lib/eal: fix segfaults due to thread exit order
2022-05-30 13:47 ` [PATCH v3] lib/eal: fix segfaults due to thread exit order zhichaox.zeng
` (2 preceding siblings ...)
2022-06-07 10:14 ` Zeng, ZhichaoX
@ 2022-06-15 6:01 ` zhichaox.zeng
2022-06-24 1:42 ` Zeng, ZhichaoX
` (2 more replies)
3 siblings, 3 replies; 16+ messages in thread
From: zhichaox.zeng @ 2022-06-15 6:01 UTC (permalink / raw)
To: dev
Cc: stable, qiming.yang, david.marchand, stephen, mb, Zhichao Zeng,
Bruce Richardson, Harman Kalra
From: Zhichao Zeng <zhichaox.zeng@intel.com>
The eal-intr-thread is not closed before memory cleanup in the
process of exiting. There is a small probability that when the
eal-intr-thread is about to use some pointers, the memory were
just cleaned, which cause the segment fault error caught by ASan.
This patch close the eal-intr-thread before memory cleanup when
exiting to avoid segment fault. And add some atomic operations
to avoid executing rte_eal_cleanup in the child process spawned
by fork() in some test cases, e.g. debug_autotest of dpdk-test.
Cc: stable@dpdk.org
---
v2:
add the same API for FreeBSD
---
v3:
fix rte_eal_cleanup crash in debug_autotest
---
v4:
shorten the prompt message and optimize the commit log
Suggested-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
lib/eal/common/eal_private.h | 7 +++++++
lib/eal/freebsd/eal.c | 21 ++++++++++++++++++++-
lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
lib/eal/linux/eal.c | 20 +++++++++++++++++++-
lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
5 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 44d14241f0..7adf41b7d7 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
*/
int rte_eal_intr_init(void);
+/**
+ * Destroy interrupt handling thread.
+ *
+ * This function is private to EAL.
+ */
+void rte_eal_intr_destroy(void);
+
/**
* Close the default log stream
*
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index a6b20960f2..4882f27abd 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -72,6 +72,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge the running status of the eal */
+static uint32_t run_once;
int
eal_clean_runtime_dir(void)
@@ -574,12 +576,22 @@ static void rte_eal_init_alert(const char *msg)
RTE_LOG(ERR, EAL, "%s\n", msg);
}
+static void warn_parent(void)
+{
+ RTE_LOG(WARNING, EAL, "DPDK won't work in the child process\n");
+}
+
+static void scratch_child(void)
+{
+ /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
- static uint32_t run_once;
uint32_t has_run = 0;
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
@@ -883,6 +895,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, warn_parent, scratch_child);
+
return fctret;
}
@@ -891,8 +905,13 @@ rte_eal_cleanup(void)
{
struct internal_config *internal_conf =
eal_get_internal_configuration();
+
+ if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
+ return 0;
+
rte_service_finalize();
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
rte_eal_alarm_cleanup();
diff --git a/lib/eal/freebsd/eal_interrupts.c b/lib/eal/freebsd/eal_interrupts.c
index 9f720bdc8f..cac3859b06 100644
--- a/lib/eal/freebsd/eal_interrupts.c
+++ b/lib/eal/freebsd/eal_interrupts.c
@@ -648,6 +648,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close kqueue */
+ close(kq);
+ kq = -1;
+}
+
int
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data)
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 1ef263434a..effebb33a6 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -76,6 +76,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge the running status of the eal */
+static uint32_t run_once;
int
eal_clean_runtime_dir(void)
@@ -857,12 +859,22 @@ is_iommu_enabled(void)
return n > 2;
}
+static void warn_parent(void)
+{
+ RTE_LOG(WARNING, EAL, "DPDK won't work in the child process\n");
+}
+
+static void scratch_child(void)
+{
+ /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
+ __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
- static uint32_t run_once;
uint32_t has_run = 0;
const char *p;
static char logid[PATH_MAX];
@@ -1228,6 +1240,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, warn_parent, scratch_child);
+
return fctret;
}
@@ -1257,6 +1271,9 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
+ return 0;
+
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
@@ -1266,6 +1283,7 @@ rte_eal_cleanup(void)
vfio_mp_sync_cleanup();
#endif
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
eal_mp_dev_hotplug_cleanup();
diff --git a/lib/eal/linux/eal_interrupts.c b/lib/eal/linux/eal_interrupts.c
index d52ec8eb4c..7e9853e8e7 100644
--- a/lib/eal/linux/eal_interrupts.c
+++ b/lib/eal/linux/eal_interrupts.c
@@ -1199,6 +1199,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close the pipe used by epoll */
+ close(intr_pipe.writefd);
+ close(intr_pipe.readfd);
+}
+
static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v4] lib/eal: fix segfaults due to thread exit order
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
@ 2022-06-24 1:42 ` Zeng, ZhichaoX
2022-06-24 7:50 ` David Marchand
2022-06-30 10:38 ` Zeng, ZhichaoX
2022-06-30 12:24 ` Bruce Richardson
2022-09-06 2:51 ` [PATCH v5] lib/eal: fix segfaults in exiting Zhichao Zeng
2 siblings, 2 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-06-24 1:42 UTC (permalink / raw)
To: dev
Cc: stable, Yang, Qiming, david.marchand, stephen, mb, Richardson,
Bruce, Harman Kalra
Hi David, Harman
Please review this patch at your convenience, it's been about a month since the v1 version.
Thanks!
Best regards
Zhichao
> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Sent: Wednesday, June 15, 2022 2:02 PM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Yang, Qiming <qiming.yang@intel.com>; david.marchand@redhat.com; stephen@networkplumber.org; mb@smartsharesystems.com; Zeng, ZhichaoX <zhichaox.zeng@intel.com>; Richardson, Bruce > <bruce.richardson@intel.com>; Harman Kalra <hkalra@marvell.com>
> Subject: [PATCH v4] lib/eal: fix segfaults due to thread exit order
>
> From: Zhichao Zeng <zhichaox.zeng@intel.com>
>
> The eal-intr-thread is not closed before memory cleanup in the process of exiting. There is a small probability that when the eal-intr-thread is about to use some pointers, the memory were just cleaned, which cause the > segment fault error caught by ASan.
>
> This patch close the eal-intr-thread before memory cleanup when exiting to avoid segment fault. And add some atomic operations to avoid executing rte_eal_cleanup in the child process spawned by fork() in some test > cases, e.g. debug_autotest of dpdk-test.
>
> Cc: stable@dpdk.org
>
> ---
> v2:
> add the same API for FreeBSD
> ---
> v3:
> fix rte_eal_cleanup crash in debug_autotest
> ---
> v4:
> shorten the prompt message and optimize the commit log
>
> Suggested-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v4] lib/eal: fix segfaults due to thread exit order
2022-06-24 1:42 ` Zeng, ZhichaoX
@ 2022-06-24 7:50 ` David Marchand
2022-06-30 10:38 ` Zeng, ZhichaoX
1 sibling, 0 replies; 16+ messages in thread
From: David Marchand @ 2022-06-24 7:50 UTC (permalink / raw)
To: Zeng, ZhichaoX
Cc: dev, stable, Yang, Qiming, stephen, mb, Richardson, Bruce, Harman Kalra
On Fri, Jun 24, 2022 at 3:42 AM Zeng, ZhichaoX <zhichaox.zeng@intel.com> wrote:
>
> Hi David, Harman
> Please review this patch at your convenience, it's been about a month since the v1 version.
> Thanks!
Yes, it's been a month.
Feel free to help on reviewing patches from others so that reviews can
be quicker for everyone.
Thanks.
--
David Marchand
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v4] lib/eal: fix segfaults due to thread exit order
2022-06-24 1:42 ` Zeng, ZhichaoX
2022-06-24 7:50 ` David Marchand
@ 2022-06-30 10:38 ` Zeng, ZhichaoX
1 sibling, 0 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-06-30 10:38 UTC (permalink / raw)
To: Harman Kalra, Richardson, Bruce; +Cc: stable, Yang, Qiming, dev, Zeng, ZhichaoX
Hi Harman, Bruce
Could you please help to review this patch, thank you so much!
Best regards
Zhichao
> -----Original Message-----
> From: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Sent: Wednesday, June 15, 2022 2:02 PM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Yang, Qiming <qiming.yang@intel.com>; david.marchand@redhat.com; stephen@networkplumber.org; mb@smartsharesystems.com; Zeng, ZhichaoX <zhichaox.zeng@intel.com>; Richardson, Bruce > <bruce.richardson@intel.com>; Harman Kalra <hkalra@marvell.com>
> Subject: [PATCH v4] lib/eal: fix segfaults due to thread exit order
>
> From: Zhichao Zeng <zhichaox.zeng@intel.com>
>
> The eal-intr-thread is not closed before memory cleanup in the process of exiting. There is a small probability that when the eal-intr-thread is about to use some pointers, the memory were just cleaned, which cause the > segment fault error caught by ASan.
>
> This patch close the eal-intr-thread before memory cleanup when exiting to avoid segment fault. And add some atomic operations to avoid executing rte_eal_cleanup in the child process spawned by fork() in some test > cases, e.g. debug_autotest of dpdk-test.
>
> Cc: stable@dpdk.org
>
> ---
> v2:
> add the same API for FreeBSD
> ---
> v3:
> fix rte_eal_cleanup crash in debug_autotest
> ---
> v4:
> shorten the prompt message and optimize the commit log
>
> Suggested-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v4] lib/eal: fix segfaults due to thread exit order
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
2022-06-24 1:42 ` Zeng, ZhichaoX
@ 2022-06-30 12:24 ` Bruce Richardson
2022-09-06 2:51 ` [PATCH v5] lib/eal: fix segfaults in exiting Zhichao Zeng
2 siblings, 0 replies; 16+ messages in thread
From: Bruce Richardson @ 2022-06-30 12:24 UTC (permalink / raw)
To: zhichaox.zeng
Cc: dev, stable, qiming.yang, david.marchand, stephen, mb, Harman Kalra
On Wed, Jun 15, 2022 at 02:01:54PM +0800, zhichaox.zeng@intel.com wrote:
> From: Zhichao Zeng <zhichaox.zeng@intel.com>
>
> The eal-intr-thread is not closed before memory cleanup in the
> process of exiting. There is a small probability that when the
> eal-intr-thread is about to use some pointers, the memory were
> just cleaned, which cause the segment fault error caught by ASan.
>
> This patch close the eal-intr-thread before memory cleanup when
> exiting to avoid segment fault. And add some atomic operations
> to avoid executing rte_eal_cleanup in the child process spawned
> by fork() in some test cases, e.g. debug_autotest of dpdk-test.
>
> Cc: stable@dpdk.org
>
Hi,
some comments inline below.
/Bruce
> ---
> v2:
> add the same API for FreeBSD
> ---
> v3:
> fix rte_eal_cleanup crash in debug_autotest
> ---
> v4:
> shorten the prompt message and optimize the commit log
>
Please put these updates below the cutline after the sign-offs, i.e.
immediately before the diffstat.
> Suggested-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
> ---
> lib/eal/common/eal_private.h | 7 +++++++
> lib/eal/freebsd/eal.c | 21 ++++++++++++++++++++-
> lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
> lib/eal/linux/eal.c | 20 +++++++++++++++++++-
> lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
> 5 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> index 44d14241f0..7adf41b7d7 100644
> --- a/lib/eal/common/eal_private.h
> +++ b/lib/eal/common/eal_private.h
> @@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
> */
> int rte_eal_intr_init(void);
>
> +/**
> + * Destroy interrupt handling thread.
> + *
> + * This function is private to EAL.
> + */
> +void rte_eal_intr_destroy(void);
> +
> /**
> * Close the default log stream
> *
> diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
> index a6b20960f2..4882f27abd 100644
> --- a/lib/eal/freebsd/eal.c
> +++ b/lib/eal/freebsd/eal.c
> @@ -72,6 +72,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
> /* used by rte_rdtsc() */
> int rte_cycles_vmware_tsc_map;
>
> +/* used to judge the running status of the eal */
> +static uint32_t run_once;
>
I don't like just moving this variable from the eal_init function. When in
eal_init the name "run_once" made sense as it tracked how often the EAL
init function was run. However, now as a global variable the name
"run_once" no longer makes sense.
Two suggestions:
1. Keep run_once in EAL init as-is, and use a different variable or value
to indicate that DPDK is initialized for cleanup.
2. Move the variable as you have here, just rename it to a more meaningful
name.
> int
> eal_clean_runtime_dir(void)
> @@ -574,12 +576,22 @@ static void rte_eal_init_alert(const char *msg)
> RTE_LOG(ERR, EAL, "%s\n", msg);
> }
>
> +static void warn_parent(void)
> +{
> + RTE_LOG(WARNING, EAL, "DPDK won't work in the child process\n");
> +}
I wonder if this contains enough information. Can we identify briefly what
parts will or won't work, or if we just want to deny everything, can we
give a brief reason why?
> +
> +static void scratch_child(void)
> +{
> + /* Scratch run_once so that a call to rte_eal_cleanup won't crash... */
> + __atomic_store_n(&run_once, 0, __ATOMIC_RELAXED);
> +}
> +
I think the name of this function needs improvement. I'm not sure that
"scratch" is the best term to use. Something like "clear_eal_flag" is
probably better.
> /* Launch threads, called at application init(). */
> int
> rte_eal_init(int argc, char **argv)
> {
> int i, fctret, ret;
> - static uint32_t run_once;
> uint32_t has_run = 0;
> char cpuset[RTE_CPU_AFFINITY_STR_LEN];
> char thread_name[RTE_MAX_THREAD_NAME_LEN];
> @@ -883,6 +895,8 @@ rte_eal_init(int argc, char **argv)
>
> eal_mcfg_complete();
>
> + pthread_atfork(NULL, warn_parent, scratch_child);
> +
> return fctret;
> }
>
> @@ -891,8 +905,13 @@ rte_eal_cleanup(void)
> {
> struct internal_config *internal_conf =
> eal_get_internal_configuration();
> +
> + if (__atomic_load_n(&run_once, __ATOMIC_RELAXED) == 0)
> + return 0;
> +
> rte_service_finalize();
> rte_mp_channel_cleanup();
> + rte_eal_intr_destroy();
> /* after this point, any DPDK pointers will become dangling */
> rte_eal_memory_detach();
> rte_eal_alarm_cleanup();
<snip for brevity>
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v5] lib/eal: fix segfaults in exiting
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
2022-06-24 1:42 ` Zeng, ZhichaoX
2022-06-30 12:24 ` Bruce Richardson
@ 2022-09-06 2:51 ` Zhichao Zeng
2022-09-06 15:03 ` Stephen Hemminger
2022-10-11 5:25 ` [PATCH v6] " Zhichao Zeng
2 siblings, 2 replies; 16+ messages in thread
From: Zhichao Zeng @ 2022-09-06 2:51 UTC (permalink / raw)
To: dev
Cc: david.marchand, qiming.yang, yidingx.zhou, Zhichao Zeng, stable,
Bruce Richardson, Harman Kalra
The 'eal-intr-thread' is not closed before memory cleanup in the process of
exiting. There is a small chance that the 'eal-intr-thread' is about to use
some pointers, the memory was just cleaned, which causes segfaults
caught by ASan.
This patch closes the 'eal-intr-thread' before memory cleanup in
'rte_eal_cleanup' to avoid segfaults, and adds a flag to avoid executing
'rte_eal_cleanup' in the child process which is forked to execut some
test cases(e.g. debug_autotest of dpdk-test).
Bugzilla ID: 1006
Cc: stable@dpdk.org
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
v2: add same API for FreeBSD
---
v3: fix rte_eal_cleanup crash in debug_autotest
---
v4: shorten the prompt message and optimize the commit log
---
v5: simplify patch
---
lib/eal/common/eal_private.h | 7 +++++++
lib/eal/freebsd/eal.c | 13 +++++++++++++
lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
lib/eal/linux/eal.c | 13 +++++++++++++
lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
5 files changed, 57 insertions(+)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 44d14241f0..7adf41b7d7 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
*/
int rte_eal_intr_init(void);
+/**
+ * Destroy interrupt handling thread.
+ *
+ * This function is private to EAL.
+ */
+void rte_eal_intr_destroy(void);
+
/**
* Close the default log stream
*
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index 26fbc91b26..f27c1d9f97 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -72,6 +72,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge if is forked */
+static int is_forked;
int
eal_clean_runtime_dir(void)
@@ -574,6 +576,11 @@ static void rte_eal_init_alert(const char *msg)
RTE_LOG(ERR, EAL, "%s\n", msg);
}
+static void mark_forked(void)
+{
+ is_forked++;
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -883,16 +890,22 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, NULL, mark_forked);
+
return fctret;
}
int
rte_eal_cleanup(void)
{
+ if (is_forked)
+ return 0;
+
struct internal_config *internal_conf =
eal_get_internal_configuration();
rte_service_finalize();
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
rte_trace_save();
eal_trace_fini();
/* after this point, any DPDK pointers will become dangling */
diff --git a/lib/eal/freebsd/eal_interrupts.c b/lib/eal/freebsd/eal_interrupts.c
index 9f720bdc8f..cac3859b06 100644
--- a/lib/eal/freebsd/eal_interrupts.c
+++ b/lib/eal/freebsd/eal_interrupts.c
@@ -648,6 +648,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close kqueue */
+ close(kq);
+ kq = -1;
+}
+
int
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data)
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 37d29643a5..ea38ec183d 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -76,6 +76,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* used to judge if is forked */
+static int is_forked;
int
eal_clean_runtime_dir(void)
@@ -954,6 +956,11 @@ eal_worker_thread_create(unsigned int lcore_id)
return ret;
}
+static void mark_forked(void)
+{
+ is_forked++;
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -1324,6 +1331,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, NULL, mark_forked);
+
return fctret;
}
@@ -1347,6 +1356,9 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
int
rte_eal_cleanup(void)
{
+ if (is_forked)
+ return 0;
+
/* if we're in a primary process, we need to mark hugepages as freeable
* so that finalization can release them back to the system.
*/
@@ -1362,6 +1374,7 @@ rte_eal_cleanup(void)
vfio_mp_sync_cleanup();
#endif
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
rte_trace_save();
eal_trace_fini();
/* after this point, any DPDK pointers will become dangling */
diff --git a/lib/eal/linux/eal_interrupts.c b/lib/eal/linux/eal_interrupts.c
index d52ec8eb4c..7e9853e8e7 100644
--- a/lib/eal/linux/eal_interrupts.c
+++ b/lib/eal/linux/eal_interrupts.c
@@ -1199,6 +1199,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close the pipe used by epoll */
+ close(intr_pipe.writefd);
+ close(intr_pipe.readfd);
+}
+
static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v5] lib/eal: fix segfaults in exiting
2022-09-06 2:51 ` [PATCH v5] lib/eal: fix segfaults in exiting Zhichao Zeng
@ 2022-09-06 15:03 ` Stephen Hemminger
2022-09-07 8:53 ` Zeng, ZhichaoX
2022-10-11 5:25 ` [PATCH v6] " Zhichao Zeng
1 sibling, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2022-09-06 15:03 UTC (permalink / raw)
To: Zhichao Zeng
Cc: dev, david.marchand, qiming.yang, yidingx.zhou, stable,
Bruce Richardson, Harman Kalra
On Tue, 6 Sep 2022 10:51:31 +0800
Zhichao Zeng <zhichaox.zeng@intel.com> wrote:
>
> +static void mark_forked(void)
> +{
> + is_forked++;
> +}
> +
This will end up counting application threads as well.
Also, it would need to be atomic.
> /* Launch threads, called at application init(). */
> int
> rte_eal_init(int argc, char **argv)
> @@ -1324,6 +1331,8 @@ rte_eal_init(int argc, char **argv)
>
> eal_mcfg_complete();
>
> + pthread_atfork(NULL, NULL, mark_forked);
> +
> return fctret;
> }
> int
> rte_eal_cleanup(void)
> {
> + if (is_forked)
> + return 0;
> +
rte_eal_cleanup is supposed to be called only once by application.
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v5] lib/eal: fix segfaults in exiting
2022-09-06 15:03 ` Stephen Hemminger
@ 2022-09-07 8:53 ` Zeng, ZhichaoX
0 siblings, 0 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-09-07 8:53 UTC (permalink / raw)
To: Stephen Hemminger
Cc: dev, david.marchand, Yang, Qiming, Zhou, YidingX, stable,
Richardson, Bruce, Harman Kalra
Hi Stephen,
> >
> > +static void mark_forked(void)
> > +{
> > + is_forked++;
> > +}
> > +
>
> This will end up counting application threads as well.
>
I think it would be counted in the child process when 'fork()' is called,
and in the parent process, it would be zero.
> Also, it would need to be atomic.
>
Thanks for your advice.
> > /* Launch threads, called at application init(). */ int
> > rte_eal_init(int argc, char **argv) @@ -1324,6 +1331,8 @@
> > rte_eal_init(int argc, char **argv)
> >
> > eal_mcfg_complete();
> >
> > + pthread_atfork(NULL, NULL, mark_forked);
> > +
> > return fctret;
> > }
>
> > int
> > rte_eal_cleanup(void)
> > {
> > + if (is_forked)
> > + return 0;
> > +
>
> rte_eal_cleanup is supposed to be called only once by application.
Yes. But in some case(e.g. debug_autotest of dpdk-test), it would fork
a child process to test 'rte_exit()', then it would call 'rte_eal_cleanup()'.
So 'is_forked' is introduced to avoid this situation.
Regards
Zhichao
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v6] lib/eal: fix segfaults in exiting
2022-09-06 2:51 ` [PATCH v5] lib/eal: fix segfaults in exiting Zhichao Zeng
2022-09-06 15:03 ` Stephen Hemminger
@ 2022-10-11 5:25 ` Zhichao Zeng
2022-10-11 14:04 ` Stephen Hemminger
1 sibling, 1 reply; 16+ messages in thread
From: Zhichao Zeng @ 2022-10-11 5:25 UTC (permalink / raw)
To: dev; +Cc: stable, yidingx.zhou, Zhichao Zeng, Bruce Richardson, Harman Kalra
The 'eal-intr-thread' is not closed before memory cleanup in the process of
exiting. There is a small chance when 'eal-intr-thread' use some pointers,
meanwhile the memory was just cleaned, which causes segfaults.
This patch closes the 'eal-intr-thread' before memory cleanup in
'rte_eal_cleanup' to avoid segfaults, and adds a flag to avoid executing
'rte_eal_cleanup' in the child process which is forked to execute some
test cases(e.g. debug_autotest of dpdk-test).
Bugzilla ID: 1006
Cc: stable@dpdk.org
Signed-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>
---
v6: use atomic operation
---
v5: simplify patch
---
v4: shorten the prompt message and optimize the commit log
---
v3: fix rte_eal_cleanup crash in debug_autotest
---
v2: add same API for FreeBSD
---
lib/eal/common/eal_private.h | 7 +++++++
lib/eal/freebsd/eal.c | 20 ++++++++++++++++++++
lib/eal/freebsd/eal_interrupts.c | 12 ++++++++++++
lib/eal/linux/eal.c | 20 ++++++++++++++++++++
lib/eal/linux/eal_interrupts.c | 12 ++++++++++++
5 files changed, 71 insertions(+)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 0f4d75bb89..2e3342bd15 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -152,6 +152,13 @@ int rte_eal_tailqs_init(void);
*/
int rte_eal_intr_init(void);
+/**
+ * Destroy interrupt handling thread.
+ *
+ * This function is private to EAL.
+ */
+void rte_eal_intr_destroy(void);
+
/**
* Close the default log stream
*
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index 1b58cd3da6..0839aa211c 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -73,6 +73,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* mark process is forked */
+static uint32_t forked_flag;
int
eal_clean_runtime_dir(void)
@@ -575,6 +577,18 @@ static void rte_eal_init_alert(const char *msg)
RTE_LOG(ERR, EAL, "%s\n", msg);
}
+static void
+mark_forked(void)
+{
+ __atomic_add_fetch(&forked_flag, 1, __ATOMIC_RELAXED);
+}
+
+static uint32_t
+is_forked(void)
+{
+ return __atomic_load_n(&forked_flag, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -884,16 +898,22 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, NULL, mark_forked);
+
return fctret;
}
int
rte_eal_cleanup(void)
{
+ if (is_forked())
+ return 0;
+
struct internal_config *internal_conf =
eal_get_internal_configuration();
rte_service_finalize();
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
eal_bus_cleanup();
rte_trace_save();
eal_trace_fini();
diff --git a/lib/eal/freebsd/eal_interrupts.c b/lib/eal/freebsd/eal_interrupts.c
index 9f720bdc8f..cac3859b06 100644
--- a/lib/eal/freebsd/eal_interrupts.c
+++ b/lib/eal/freebsd/eal_interrupts.c
@@ -648,6 +648,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close kqueue */
+ close(kq);
+ kq = -1;
+}
+
int
rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data)
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index e74542fc71..ef15d7e7f0 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -77,6 +77,8 @@ struct lcore_config lcore_config[RTE_MAX_LCORE];
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* mark process is forked */
+static uint32_t forked_flag;
int
eal_clean_runtime_dir(void)
@@ -955,6 +957,18 @@ eal_worker_thread_create(unsigned int lcore_id)
return ret;
}
+static void
+mark_forked(void)
+{
+ __atomic_add_fetch(&forked_flag, 1, __ATOMIC_RELAXED);
+}
+
+static uint32_t
+is_forked(void)
+{
+ return __atomic_load_n(&forked_flag, __ATOMIC_RELAXED);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -1325,6 +1339,8 @@ rte_eal_init(int argc, char **argv)
eal_mcfg_complete();
+ pthread_atfork(NULL, NULL, mark_forked);
+
return fctret;
}
@@ -1348,6 +1364,9 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
int
rte_eal_cleanup(void)
{
+ if (is_forked())
+ return 0;
+
/* if we're in a primary process, we need to mark hugepages as freeable
* so that finalization can release them back to the system.
*/
@@ -1363,6 +1382,7 @@ rte_eal_cleanup(void)
vfio_mp_sync_cleanup();
#endif
rte_mp_channel_cleanup();
+ rte_eal_intr_destroy();
eal_bus_cleanup();
rte_trace_save();
eal_trace_fini();
diff --git a/lib/eal/linux/eal_interrupts.c b/lib/eal/linux/eal_interrupts.c
index d52ec8eb4c..7e9853e8e7 100644
--- a/lib/eal/linux/eal_interrupts.c
+++ b/lib/eal/linux/eal_interrupts.c
@@ -1199,6 +1199,18 @@ rte_eal_intr_init(void)
return ret;
}
+void
+rte_eal_intr_destroy(void)
+{
+ /* cancel the host thread to wait/handle the interrupt */
+ pthread_cancel(intr_thread);
+ pthread_join(intr_thread, NULL);
+
+ /* close the pipe used by epoll */
+ close(intr_pipe.writefd);
+ close(intr_pipe.readfd);
+}
+
static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
--
2.25.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v6] lib/eal: fix segfaults in exiting
2022-10-11 5:25 ` [PATCH v6] " Zhichao Zeng
@ 2022-10-11 14:04 ` Stephen Hemminger
2022-10-19 1:51 ` Zeng, ZhichaoX
0 siblings, 1 reply; 16+ messages in thread
From: Stephen Hemminger @ 2022-10-11 14:04 UTC (permalink / raw)
To: Zhichao Zeng; +Cc: dev, stable, yidingx.zhou, Bruce Richardson, Harman Kalra
On Tue, 11 Oct 2022 13:25:14 +0800
Zhichao Zeng <zhichaox.zeng@intel.com> wrote:
> This patch closes the 'eal-intr-thread' before memory cleanup in
> 'rte_eal_cleanup' to avoid segfaults, and adds a flag to avoid executing
> 'rte_eal_cleanup' in the child process which is forked to execute some
> test cases(e.g. debug_autotest of dpdk-test
This is a test bug, not an DPDK bug. I don't think DPDK should account
for misuse of API in this way.
^ permalink raw reply [flat|nested] 16+ messages in thread
* RE: [PATCH v6] lib/eal: fix segfaults in exiting
2022-10-11 14:04 ` Stephen Hemminger
@ 2022-10-19 1:51 ` Zeng, ZhichaoX
0 siblings, 0 replies; 16+ messages in thread
From: Zeng, ZhichaoX @ 2022-10-19 1:51 UTC (permalink / raw)
To: Stephen Hemminger, David Marchand
Cc: dev, stable, Zhou, YidingX, Richardson, Bruce, Harman Kalra
Hi Stephen, thanks for your comments.
The original goal of this patch is to close the 'eal-intr-thread' to fix segfaults
caught by Asan. But it breaks the debug_autotest unit test.
So the flag is added to fix the unit test.
Hi @David Marchand, what's your suggestions?
BR
Zhichao
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Tuesday, October 11, 2022 10:04 PM
> To: Zeng, ZhichaoX <zhichaox.zeng@intel.com>
> Cc: dev@dpdk.org; stable@dpdk.org; Zhou, YidingX
> <yidingx.zhou@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Harman Kalra <hkalra@marvell.com>
> Subject: Re: [PATCH v6] lib/eal: fix segfaults in exiting
>
> On Tue, 11 Oct 2022 13:25:14 +0800
> Zhichao Zeng <zhichaox.zeng@intel.com> wrote:
>
> > This patch closes the 'eal-intr-thread' before memory cleanup in
> > 'rte_eal_cleanup' to avoid segfaults, and adds a flag to avoid
> > executing 'rte_eal_cleanup' in the child process which is forked to
> > execute some test cases(e.g. debug_autotest of dpdk-test
>
> This is a test bug, not an DPDK bug. I don't think DPDK should account for
> misuse of API in this way.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2022-10-19 1:51 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20220523111642.10406-1-zhichaox.zeng@intel.com>
2022-05-30 13:47 ` [PATCH v3] lib/eal: fix segfaults due to thread exit order zhichaox.zeng
2022-05-30 16:26 ` Stephen Hemminger
2022-05-30 16:28 ` Stephen Hemminger
2022-06-02 8:21 ` Zeng, ZhichaoX
2022-06-07 10:14 ` Zeng, ZhichaoX
2022-06-15 6:01 ` [PATCH v4] " zhichaox.zeng
2022-06-24 1:42 ` Zeng, ZhichaoX
2022-06-24 7:50 ` David Marchand
2022-06-30 10:38 ` Zeng, ZhichaoX
2022-06-30 12:24 ` Bruce Richardson
2022-09-06 2:51 ` [PATCH v5] lib/eal: fix segfaults in exiting Zhichao Zeng
2022-09-06 15:03 ` Stephen Hemminger
2022-09-07 8:53 ` Zeng, ZhichaoX
2022-10-11 5:25 ` [PATCH v6] " Zhichao Zeng
2022-10-11 14:04 ` Stephen Hemminger
2022-10-19 1:51 ` Zeng, ZhichaoX
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).