DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] eal: fix use wrong time API
@ 2021-04-28  8:12 Min Hu (Connor)
  2021-04-28  9:24 ` Morten Brørup
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Min Hu (Connor) @ 2021-04-28  8:12 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, jerinj, skori

From: Chengwen Feng <fengchengwen@huawei.com>

Currently, the mp uses gettimeofday() API to get the time, and used as
timeout parameter.

But the time which gets from gettimeofday() API isn't monotonically
increasing. The process may fail if the system time is changed.

This fixes it by using clock_gettime() API with monotonic attribution.

Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
 lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 6d1af3c..7f08826 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
 static char peer_name[PATH_MAX];
 
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
 struct action_entry {
 	TAILQ_ENTRY(action_entry) next;
 	char action_name[RTE_MP_MAX_NAME_LEN];
@@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
 	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
-	struct timeval now;
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Cannot get current time\n");
-		goto no_trigger;
-	}
-	ts_now.tv_nsec = now.tv_usec * 1000;
-	ts_now.tv_sec = now.tv_sec;
+	clock_gettime(CLOCK_TYPE_ID, &ts_now);
 
 	action = process_async_request(req, &ts_now);
 
@@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
+	pthread_condattr_t attr;
 	struct rte_mp_msg msg, *tmp;
 	struct pending_request pending_req, *exist;
 
@@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
 	pending_req.request = req;
 	pending_req.reply = &msg;
-	pthread_cond_init(&pending_req.sync.cond, NULL);
+	pthread_condattr_init(&attr);
+	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+	pthread_cond_init(&pending_req.sync.cond, &attr);
 
 	exist = find_pending_request(dst, req->name);
 	if (exist) {
@@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
-	struct timespec end;
+	struct timespec now, end;
 	const struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 
@@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		goto end;
-	}
-
-	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_TYPE_ID, &now);
+	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end.tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
@@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
+	struct timespec now;
 	struct timespec *end;
 	bool dummy_used = false;
 	const struct internal_config *internal_conf =
@@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		return -1;
-	}
 	copy = calloc(1, sizeof(*copy));
 	dummy = calloc(1, sizeof(*dummy));
 	param = calloc(1, sizeof(*param));
@@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	end = &param->end;
 	reply = &param->user_reply;
 
-	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_TYPE_ID, &now);
+	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end->tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH] eal: fix use wrong time API
  2021-04-28  8:12 [dpdk-dev] [PATCH] eal: fix use wrong time API Min Hu (Connor)
@ 2021-04-28  9:24 ` Morten Brørup
  2021-04-28 10:36   ` fengchengwen
  2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
  2021-05-11 10:41 ` [dpdk-dev] [PATCH v4] " Chengwen Feng
  2 siblings, 1 reply; 18+ messages in thread
From: Morten Brørup @ 2021-04-28  9:24 UTC (permalink / raw)
  To: Min Hu (Connor), dev; +Cc: ferruh.yigit, jerinj, skori

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
> Sent: Wednesday, April 28, 2021 10:13 AM
> 
> From: Chengwen Feng <fengchengwen@huawei.com>
> 
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process
> communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++---------------
> --------
>  1 file changed, 19 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_proc.c
> b/lib/eal/common/eal_common_proc.c
> index 6d1af3c..7f08826 100644
> --- a/lib/eal/common/eal_common_proc.c
> +++ b/lib/eal/common/eal_common_proc.c
> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The directory
> path for all mp sockets */
>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>  static char peer_name[PATH_MAX];
> 
> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
> +#else
> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
> +#endif

Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over CLOCK_MONOTONIC?

> +
>  struct action_entry {
>  	TAILQ_ENTRY(action_entry) next;
>  	char action_name[RTE_MP_MAX_NAME_LEN];
> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>  	struct pending_request *req = (struct pending_request *)arg;
>  	enum async_action action;
>  	struct timespec ts_now;
> -	struct timeval now;
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> -		goto no_trigger;
> -	}
> -	ts_now.tv_nsec = now.tv_usec * 1000;
> -	ts_now.tv_sec = now.tv_sec;
> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
> 
>  	action = process_async_request(req, &ts_now);
> 
> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	       struct rte_mp_reply *reply, const struct timespec *ts)
>  {
>  	int ret;
> +	pthread_condattr_t attr;
>  	struct rte_mp_msg msg, *tmp;
>  	struct pending_request pending_req, *exist;
> 
> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>  	pending_req.request = req;
>  	pending_req.reply = &msg;
> -	pthread_cond_init(&pending_req.sync.cond, NULL);
> +	pthread_condattr_init(&attr);
> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);

Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?

> +	pthread_cond_init(&pending_req.sync.cond, &attr);
> 
>  	exist = find_pending_request(dst, req->name);
>  	if (exist) {
> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  	int dir_fd, ret = -1;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> -	struct timespec end;
> +	struct timespec now, end;
>  	const struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
> 
> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> struct rte_mp_reply *reply,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		goto end;
> -	}
> -
> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_TYPE_ID, &now);
> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end.tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> 
>  	/* for secondary process, send request to the primary process
> only */
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	int dir_fd, ret = 0;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> +	struct timespec now;
>  	struct timespec *end;
>  	bool dummy_used = false;
>  	const struct internal_config *internal_conf =
> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		return -1;
> -	}
>  	copy = calloc(1, sizeof(*copy));
>  	dummy = calloc(1, sizeof(*dummy));
>  	param = calloc(1, sizeof(*param));
> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	end = &param->end;
>  	reply = &param->user_reply;
> 
> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_TYPE_ID, &now);
> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end->tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>  	reply->nb_sent = 0;
>  	reply->nb_received = 0;
>  	reply->msgs = NULL;
> --
> 2.7.4
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH] eal: fix use wrong time API
  2021-04-28  9:24 ` Morten Brørup
@ 2021-04-28 10:36   ` fengchengwen
  2021-04-28 10:59     ` Morten Brørup
  0 siblings, 1 reply; 18+ messages in thread
From: fengchengwen @ 2021-04-28 10:36 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Min Hu (Connor), dev, ferruh.yigit, jerinj, skori



On 2021/4/28 17:24, Morten Brørup wrote:
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
>> Sent: Wednesday, April 28, 2021 10:13 AM
>>
>> From: Chengwen Feng <fengchengwen@huawei.com>
>>
>> Currently, the mp uses gettimeofday() API to get the time, and used as
>> timeout parameter.
>>
>> But the time which gets from gettimeofday() API isn't monotonically
>> increasing. The process may fail if the system time is changed.
>>
>> This fixes it by using clock_gettime() API with monotonic attribution.
>>
>> Fixes: 783b6e54971d ("eal: add synchronous multi-process
>> communication")
>> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
>> ---
>>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++---------------
>> --------
>>  1 file changed, 19 insertions(+), 26 deletions(-)
>>
>> diff --git a/lib/eal/common/eal_common_proc.c
>> b/lib/eal/common/eal_common_proc.c
>> index 6d1af3c..7f08826 100644
>> --- a/lib/eal/common/eal_common_proc.c
>> +++ b/lib/eal/common/eal_common_proc.c
>> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The directory
>> path for all mp sockets */
>>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>>  static char peer_name[PATH_MAX];
>>
>> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
>> +#else
>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
>> +#endif
> 
> Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over CLOCK_MONOTONIC?
> 

there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local oscillator
(pls see below link), just preferred in engineering practice.
https://stackoverflow.com/questions/14270300/what-is-the-difference-between-clock-monotonic-clock-monotonic-raw

>> +
>>  struct action_entry {
>>  	TAILQ_ENTRY(action_entry) next;
>>  	char action_name[RTE_MP_MAX_NAME_LEN];
>> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>>  	struct pending_request *req = (struct pending_request *)arg;
>>  	enum async_action action;
>>  	struct timespec ts_now;
>> -	struct timeval now;
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
>> -		goto no_trigger;
>> -	}
>> -	ts_now.tv_nsec = now.tv_usec * 1000;
>> -	ts_now.tv_sec = now.tv_sec;
>> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
>>
>>  	action = process_async_request(req, &ts_now);
>>
>> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg
>> *req,
>>  	       struct rte_mp_reply *reply, const struct timespec *ts)
>>  {
>>  	int ret;
>> +	pthread_condattr_t attr;
>>  	struct rte_mp_msg msg, *tmp;
>>  	struct pending_request pending_req, *exist;
>>
>> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg
>> *req,
>>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>>  	pending_req.request = req;
>>  	pending_req.reply = &msg;
>> -	pthread_cond_init(&pending_req.sync.cond, NULL);
>> +	pthread_condattr_init(&attr);
>> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> 
> Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?

After reading the source code, it only support CLOCK_MONOTONIC and CLOCK_REALTIME
(pls see below link), so cant't use CLOCK_TYPE_ID here.
https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c.html#pthread_condattr_setclock

will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.

thanks

> 
>> +	pthread_cond_init(&pending_req.sync.cond, &attr);
>>
>>  	exist = find_pending_request(dst, req->name);
>>  	if (exist) {
>> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct
>> rte_mp_reply *reply,
>>  	int dir_fd, ret = -1;
>>  	DIR *mp_dir;
>>  	struct dirent *ent;
>> -	struct timeval now;
>> -	struct timespec end;
>> +	struct timespec now, end;
>>  	const struct internal_config *internal_conf =
>>  		eal_get_internal_configuration();
>>
>> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>> struct rte_mp_reply *reply,
>>  		return -1;
>>  	}
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>> -		rte_errno = errno;
>> -		goto end;
>> -	}
>> -
>> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>  	end.tv_sec = now.tv_sec + ts->tv_sec +
>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>
>>  	/* for secondary process, send request to the primary process
>> only */
>>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  	int dir_fd, ret = 0;
>>  	DIR *mp_dir;
>>  	struct dirent *ent;
>> -	struct timeval now;
>> +	struct timespec now;
>>  	struct timespec *end;
>>  	bool dummy_used = false;
>>  	const struct internal_config *internal_conf =
>> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  		return -1;
>>  	}
>>
>> -	if (gettimeofday(&now, NULL) < 0) {
>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>> -		rte_errno = errno;
>> -		return -1;
>> -	}
>>  	copy = calloc(1, sizeof(*copy));
>>  	dummy = calloc(1, sizeof(*dummy));
>>  	param = calloc(1, sizeof(*param));
>> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
>> const struct timespec *ts,
>>  	end = &param->end;
>>  	reply = &param->user_reply;
>>
>> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>  	end->tv_sec = now.tv_sec + ts->tv_sec +
>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>  	reply->nb_sent = 0;
>>  	reply->nb_received = 0;
>>  	reply->msgs = NULL;
>> --
>> 2.7.4
>>
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH] eal: fix use wrong time API
  2021-04-28 10:36   ` fengchengwen
@ 2021-04-28 10:59     ` Morten Brørup
  2021-04-29  2:12       ` Min Hu (Connor)
  0 siblings, 1 reply; 18+ messages in thread
From: Morten Brørup @ 2021-04-28 10:59 UTC (permalink / raw)
  To: fengchengwen; +Cc: Min Hu (Connor), dev, ferruh.yigit, jerinj, skori

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of fengchengwen
> Sent: Wednesday, April 28, 2021 12:36 PM
> 
> On 2021/4/28 17:24, Morten Brørup wrote:
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
> >> Sent: Wednesday, April 28, 2021 10:13 AM
> >>
> >> From: Chengwen Feng <fengchengwen@huawei.com>
> >>
> >> Currently, the mp uses gettimeofday() API to get the time, and used
> as
> >> timeout parameter.
> >>
> >> But the time which gets from gettimeofday() API isn't monotonically
> >> increasing. The process may fail if the system time is changed.
> >>
> >> This fixes it by using clock_gettime() API with monotonic
> attribution.
> >>
> >> Fixes: 783b6e54971d ("eal: add synchronous multi-process
> >> communication")
> >> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> >> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> >> ---
> >>  lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++------------
> ---
> >> --------
> >>  1 file changed, 19 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/lib/eal/common/eal_common_proc.c
> >> b/lib/eal/common/eal_common_proc.c
> >> index 6d1af3c..7f08826 100644
> >> --- a/lib/eal/common/eal_common_proc.c
> >> +++ b/lib/eal/common/eal_common_proc.c
> >> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The
> directory
> >> path for all mp sockets */
> >>  static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
> >>  static char peer_name[PATH_MAX];
> >>
> >> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
> >> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
> >> +#else
> >> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
> >> +#endif
> >
> > Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over
> CLOCK_MONOTONIC?
> >
> 
> there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local
> oscillator
> (pls see below link), just preferred in engineering practice.
> https://stackoverflow.com/questions/14270300/what-is-the-difference-
> between-clock-monotonic-clock-monotonic-raw

Interesting link! Following the treads there, it looks like CLOCK_MONOTONIC had a bug in some old kernel versions, where in certain circumstances it could jump slightly backwards.

That bug seems to have been fixed, so CLOCK_MONOTONIC should be safe to use. Source: https://bugzilla.redhat.com/show_bug.cgi?id=448449

> 
> >> +
> >>  struct action_entry {
> >>  	TAILQ_ENTRY(action_entry) next;
> >>  	char action_name[RTE_MP_MAX_NAME_LEN];
> >> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
> >>  	struct pending_request *req = (struct pending_request *)arg;
> >>  	enum async_action action;
> >>  	struct timespec ts_now;
> >> -	struct timeval now;
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> >> -		goto no_trigger;
> >> -	}
> >> -	ts_now.tv_nsec = now.tv_usec * 1000;
> >> -	ts_now.tv_sec = now.tv_sec;
> >> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
> >>
> >>  	action = process_async_request(req, &ts_now);
> >>
> >> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct
> rte_mp_msg
> >> *req,
> >>  	       struct rte_mp_reply *reply, const struct timespec *ts)
> >>  {
> >>  	int ret;
> >> +	pthread_condattr_t attr;
> >>  	struct rte_mp_msg msg, *tmp;
> >>  	struct pending_request pending_req, *exist;
> >>
> >> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct
> rte_mp_msg
> >> *req,
> >>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
> >>  	pending_req.request = req;
> >>  	pending_req.reply = &msg;
> >> -	pthread_cond_init(&pending_req.sync.cond, NULL);
> >> +	pthread_condattr_init(&attr);
> >> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> >
> > Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?
> 
> After reading the source code, it only support CLOCK_MONOTONIC and
> CLOCK_REALTIME
> (pls see below link), so cant't use CLOCK_TYPE_ID here.
> https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c
> .html#pthread_condattr_setclock
> 
> will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.

OK, then just get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC instead.

> 
> thanks
> 
> >
> >> +	pthread_cond_init(&pending_req.sync.cond, &attr);
> >>
> >>  	exist = find_pending_request(dst, req->name);
> >>  	if (exist) {
> >> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> struct
> >> rte_mp_reply *reply,
> >>  	int dir_fd, ret = -1;
> >>  	DIR *mp_dir;
> >>  	struct dirent *ent;
> >> -	struct timeval now;
> >> -	struct timespec end;
> >> +	struct timespec now, end;
> >>  	const struct internal_config *internal_conf =
> >>  		eal_get_internal_configuration();
> >>
> >> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> >> struct rte_mp_reply *reply,
> >>  		return -1;
> >>  	}
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> >> -		rte_errno = errno;
> >> -		goto end;
> >> -	}
> >> -
> >> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> >> +	clock_gettime(CLOCK_TYPE_ID, &now);
> >> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
> >>  	end.tv_sec = now.tv_sec + ts->tv_sec +
> >> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> >> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> >>
> >>  	/* for secondary process, send request to the primary process
> >> only */
> >>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> >> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  	int dir_fd, ret = 0;
> >>  	DIR *mp_dir;
> >>  	struct dirent *ent;
> >> -	struct timeval now;
> >> +	struct timespec now;
> >>  	struct timespec *end;
> >>  	bool dummy_used = false;
> >>  	const struct internal_config *internal_conf =
> >> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  		return -1;
> >>  	}
> >>
> >> -	if (gettimeofday(&now, NULL) < 0) {
> >> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> >> -		rte_errno = errno;
> >> -		return -1;
> >> -	}
> >>  	copy = calloc(1, sizeof(*copy));
> >>  	dummy = calloc(1, sizeof(*dummy));
> >>  	param = calloc(1, sizeof(*param));
> >> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
> >> const struct timespec *ts,
> >>  	end = &param->end;
> >>  	reply = &param->user_reply;
> >>
> >> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> >> +	clock_gettime(CLOCK_TYPE_ID, &now);
> >> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
> >>  	end->tv_sec = now.tv_sec + ts->tv_sec +
> >> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> >> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> >>  	reply->nb_sent = 0;
> >>  	reply->nb_received = 0;
> >>  	reply->msgs = NULL;
> >> --
> >> 2.7.4
> >>
> >
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-04-28  8:12 [dpdk-dev] [PATCH] eal: fix use wrong time API Min Hu (Connor)
  2021-04-28  9:24 ` Morten Brørup
@ 2021-04-29  2:10 ` Min Hu (Connor)
  2021-04-29  7:12   ` Morten Brørup
                     ` (2 more replies)
  2021-05-11 10:41 ` [dpdk-dev] [PATCH v4] " Chengwen Feng
  2 siblings, 3 replies; 18+ messages in thread
From: Min Hu (Connor) @ 2021-04-29  2:10 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, mb, skori, jerinj

Currently, the mp uses gettimeofday() API to get the time, and used as
timeout parameter.

But the time which gets from gettimeofday() API isn't monotonically
increasing. The process may fail if the system time is changed.

This fixes it by using clock_gettime() API with monotonic attribution.

Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
---
v2:
* get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC
instead.
---
 lib/eal/common/eal_common_proc.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 6d1af3c..6af8ba2 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -490,14 +490,8 @@ async_reply_handle_thread_unsafe(void *arg)
 	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
-	struct timeval now;
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Cannot get current time\n");
-		goto no_trigger;
-	}
-	ts_now.tv_nsec = now.tv_usec * 1000;
-	ts_now.tv_sec = now.tv_sec;
+	clock_gettime(CLOCK_MONOTONIC, &ts_now);
 
 	action = process_async_request(req, &ts_now);
 
@@ -896,6 +890,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
+	pthread_condattr_t attr;
 	struct rte_mp_msg msg, *tmp;
 	struct pending_request pending_req, *exist;
 
@@ -904,7 +899,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
 	pending_req.request = req;
 	pending_req.reply = &msg;
-	pthread_cond_init(&pending_req.sync.cond, NULL);
+	pthread_condattr_init(&attr);
+	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+	pthread_cond_init(&pending_req.sync.cond, &attr);
 
 	exist = find_pending_request(dst, req->name);
 	if (exist) {
@@ -967,8 +964,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
-	struct timespec end;
+	struct timespec now, end;
 	const struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 
@@ -987,15 +983,10 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		goto end;
-	}
-
-	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end.tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
@@ -1069,7 +1060,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
+	struct timespec now;
 	struct timespec *end;
 	bool dummy_used = false;
 	const struct internal_config *internal_conf =
@@ -1086,11 +1077,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		return -1;
-	}
 	copy = calloc(1, sizeof(*copy));
 	dummy = calloc(1, sizeof(*dummy));
 	param = calloc(1, sizeof(*param));
@@ -1108,9 +1094,10 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	end = &param->end;
 	reply = &param->user_reply;
 
-	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end->tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH] eal: fix use wrong time API
  2021-04-28 10:59     ` Morten Brørup
@ 2021-04-29  2:12       ` Min Hu (Connor)
  0 siblings, 0 replies; 18+ messages in thread
From: Min Hu (Connor) @ 2021-04-29  2:12 UTC (permalink / raw)
  To: Morten Brørup, fengchengwen; +Cc: dev, ferruh.yigit, jerinj, skori

Hi, Morten,
     fixed in v2, thanks.

在 2021/4/28 18:59, Morten Brørup 写道:
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of fengchengwen
>> Sent: Wednesday, April 28, 2021 12:36 PM
>>
>> On 2021/4/28 17:24, Morten Brørup wrote:
>>>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
>>>> Sent: Wednesday, April 28, 2021 10:13 AM
>>>>
>>>> From: Chengwen Feng <fengchengwen@huawei.com>
>>>>
>>>> Currently, the mp uses gettimeofday() API to get the time, and used
>> as
>>>> timeout parameter.
>>>>
>>>> But the time which gets from gettimeofday() API isn't monotonically
>>>> increasing. The process may fail if the system time is changed.
>>>>
>>>> This fixes it by using clock_gettime() API with monotonic
>> attribution.
>>>>
>>>> Fixes: 783b6e54971d ("eal: add synchronous multi-process
>>>> communication")
>>>> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
>>>> Cc: stable@dpdk.org
>>>>
>>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>>>> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
>>>> ---
>>>>   lib/eal/common/eal_common_proc.c | 45 +++++++++++++++++------------
>> ---
>>>> --------
>>>>   1 file changed, 19 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/lib/eal/common/eal_common_proc.c
>>>> b/lib/eal/common/eal_common_proc.c
>>>> index 6d1af3c..7f08826 100644
>>>> --- a/lib/eal/common/eal_common_proc.c
>>>> +++ b/lib/eal/common/eal_common_proc.c
>>>> @@ -40,6 +40,12 @@ static char mp_dir_path[PATH_MAX]; /* The
>> directory
>>>> path for all mp sockets */
>>>>   static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
>>>>   static char peer_name[PATH_MAX];
>>>>
>>>> +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
>>>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
>>>> +#else
>>>> +#define CLOCK_TYPE_ID CLOCK_MONOTONIC
>>>> +#endif
>>>
>>> Just out of curiosity: Why do you prefer CLOCK_MONOTONIC_RAW over
>> CLOCK_MONOTONIC?
>>>
>>
>> there may slightly difference, the CLOCK_MONOTONIC_RAW is totally local
>> oscillator
>> (pls see below link), just preferred in engineering practice.
>> https://stackoverflow.com/questions/14270300/what-is-the-difference-
>> between-clock-monotonic-clock-monotonic-raw
> 
> Interesting link! Following the treads there, it looks like CLOCK_MONOTONIC had a bug in some old kernel versions, where in certain circumstances it could jump slightly backwards.
> 
> That bug seems to have been fixed, so CLOCK_MONOTONIC should be safe to use. Source: https://bugzilla.redhat.com/show_bug.cgi?id=448449
> 
>>
>>>> +
>>>>   struct action_entry {
>>>>   	TAILQ_ENTRY(action_entry) next;
>>>>   	char action_name[RTE_MP_MAX_NAME_LEN];
>>>> @@ -490,14 +496,8 @@ async_reply_handle_thread_unsafe(void *arg)
>>>>   	struct pending_request *req = (struct pending_request *)arg;
>>>>   	enum async_action action;
>>>>   	struct timespec ts_now;
>>>> -	struct timeval now;
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
>>>> -		goto no_trigger;
>>>> -	}
>>>> -	ts_now.tv_nsec = now.tv_usec * 1000;
>>>> -	ts_now.tv_sec = now.tv_sec;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &ts_now);
>>>>
>>>>   	action = process_async_request(req, &ts_now);
>>>>
>>>> @@ -896,6 +896,7 @@ mp_request_sync(const char *dst, struct
>> rte_mp_msg
>>>> *req,
>>>>   	       struct rte_mp_reply *reply, const struct timespec *ts)
>>>>   {
>>>>   	int ret;
>>>> +	pthread_condattr_t attr;
>>>>   	struct rte_mp_msg msg, *tmp;
>>>>   	struct pending_request pending_req, *exist;
>>>>
>>>> @@ -904,7 +905,9 @@ mp_request_sync(const char *dst, struct
>> rte_mp_msg
>>>> *req,
>>>>   	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>>>>   	pending_req.request = req;
>>>>   	pending_req.reply = &msg;
>>>> -	pthread_cond_init(&pending_req.sync.cond, NULL);
>>>> +	pthread_condattr_init(&attr);
>>>> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
>>>
>>> Shouldn't CLOCK_MONOTONIC be CLOCK_TYPE_ID here too?
>>
>> After reading the source code, it only support CLOCK_MONOTONIC and
>> CLOCK_REALTIME
>> (pls see below link), so cant't use CLOCK_TYPE_ID here.
>> https://code.woboq.org/userspace/glibc/nptl/pthread_condattr_setclock.c
>> .html#pthread_condattr_setclock
>>
>> will fix in v2 by make CLOCK_TYPE_ID equal CLOCK_MONOTONIC.
> 
> OK, then just get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC instead.
> 
>>
>> thanks
>>
>>>
>>>> +	pthread_cond_init(&pending_req.sync.cond, &attr);
>>>>
>>>>   	exist = find_pending_request(dst, req->name);
>>>>   	if (exist) {
>>>> @@ -967,8 +970,7 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>> struct
>>>> rte_mp_reply *reply,
>>>>   	int dir_fd, ret = -1;
>>>>   	DIR *mp_dir;
>>>>   	struct dirent *ent;
>>>> -	struct timeval now;
>>>> -	struct timespec end;
>>>> +	struct timespec now, end;
>>>>   	const struct internal_config *internal_conf =
>>>>   		eal_get_internal_configuration();
>>>>
>>>> @@ -987,15 +989,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
>>>> struct rte_mp_reply *reply,
>>>>   		return -1;
>>>>   	}
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>>>> -		rte_errno = errno;
>>>> -		goto end;
>>>> -	}
>>>> -
>>>> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>>>> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>>>   	end.tv_sec = now.tv_sec + ts->tv_sec +
>>>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>>>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>>>
>>>>   	/* for secondary process, send request to the primary process
>>>> only */
>>>>   	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>>>> @@ -1069,7 +1066,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   	int dir_fd, ret = 0;
>>>>   	DIR *mp_dir;
>>>>   	struct dirent *ent;
>>>> -	struct timeval now;
>>>> +	struct timespec now;
>>>>   	struct timespec *end;
>>>>   	bool dummy_used = false;
>>>>   	const struct internal_config *internal_conf =
>>>> @@ -1086,11 +1083,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   		return -1;
>>>>   	}
>>>>
>>>> -	if (gettimeofday(&now, NULL) < 0) {
>>>> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
>>>> -		rte_errno = errno;
>>>> -		return -1;
>>>> -	}
>>>>   	copy = calloc(1, sizeof(*copy));
>>>>   	dummy = calloc(1, sizeof(*dummy));
>>>>   	param = calloc(1, sizeof(*param));
>>>> @@ -1108,9 +1100,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
>>>> const struct timespec *ts,
>>>>   	end = &param->end;
>>>>   	reply = &param->user_reply;
>>>>
>>>> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
>>>> +	clock_gettime(CLOCK_TYPE_ID, &now);
>>>> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>>>>   	end->tv_sec = now.tv_sec + ts->tv_sec +
>>>> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
>>>> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>>>>   	reply->nb_sent = 0;
>>>>   	reply->nb_received = 0;
>>>>   	reply->msgs = NULL;
>>>> --
>>>> 2.7.4
>>>>
>>>
>>
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
@ 2021-04-29  7:12   ` Morten Brørup
  2021-05-04 16:49   ` Thomas Monjalon
  2021-05-05  3:43   ` [dpdk-dev] [PATCH v3] " Chengwen Feng
  2 siblings, 0 replies; 18+ messages in thread
From: Morten Brørup @ 2021-04-29  7:12 UTC (permalink / raw)
  To: Min Hu (Connor), dev; +Cc: ferruh.yigit, skori, jerinj

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Min Hu (Connor)
> Sent: Thursday, April 29, 2021 4:11 AM
> To: dev@dpdk.org
> 
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process
> communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
> v2:
> * get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC
> instead.
> ---
>  lib/eal/common/eal_common_proc.c | 39 +++++++++++++-------------------
> -------
>  1 file changed, 13 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_proc.c
> b/lib/eal/common/eal_common_proc.c
> index 6d1af3c..6af8ba2 100644
> --- a/lib/eal/common/eal_common_proc.c
> +++ b/lib/eal/common/eal_common_proc.c
> @@ -490,14 +490,8 @@ async_reply_handle_thread_unsafe(void *arg)
>  	struct pending_request *req = (struct pending_request *)arg;
>  	enum async_action action;
>  	struct timespec ts_now;
> -	struct timeval now;
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> -		goto no_trigger;
> -	}
> -	ts_now.tv_nsec = now.tv_usec * 1000;
> -	ts_now.tv_sec = now.tv_sec;
> +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> 
>  	action = process_async_request(req, &ts_now);
> 
> @@ -896,6 +890,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	       struct rte_mp_reply *reply, const struct timespec *ts)
>  {
>  	int ret;
> +	pthread_condattr_t attr;
>  	struct rte_mp_msg msg, *tmp;
>  	struct pending_request pending_req, *exist;
> 
> @@ -904,7 +899,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg
> *req,
>  	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
>  	pending_req.request = req;
>  	pending_req.reply = &msg;
> -	pthread_cond_init(&pending_req.sync.cond, NULL);
> +	pthread_condattr_init(&attr);
> +	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
> +	pthread_cond_init(&pending_req.sync.cond, &attr);
> 
>  	exist = find_pending_request(dst, req->name);
>  	if (exist) {
> @@ -967,8 +964,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct
> rte_mp_reply *reply,
>  	int dir_fd, ret = -1;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> -	struct timespec end;
> +	struct timespec now, end;
>  	const struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
> 
> @@ -987,15 +983,10 @@ rte_mp_request_sync(struct rte_mp_msg *req,
> struct rte_mp_reply *reply,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		goto end;
> -	}
> -
> -	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_MONOTONIC, &now);
> +	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end.tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
> 
>  	/* for secondary process, send request to the primary process
> only */
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> @@ -1069,7 +1060,7 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	int dir_fd, ret = 0;
>  	DIR *mp_dir;
>  	struct dirent *ent;
> -	struct timeval now;
> +	struct timespec now;
>  	struct timespec *end;
>  	bool dummy_used = false;
>  	const struct internal_config *internal_conf =
> @@ -1086,11 +1077,6 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  		return -1;
>  	}
> 
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to get current time\n");
> -		rte_errno = errno;
> -		return -1;
> -	}
>  	copy = calloc(1, sizeof(*copy));
>  	dummy = calloc(1, sizeof(*dummy));
>  	param = calloc(1, sizeof(*param));
> @@ -1108,9 +1094,10 @@ rte_mp_request_async(struct rte_mp_msg *req,
> const struct timespec *ts,
>  	end = &param->end;
>  	reply = &param->user_reply;
> 
> -	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
> +	clock_gettime(CLOCK_MONOTONIC, &now);
> +	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
>  	end->tv_sec = now.tv_sec + ts->tv_sec +
> -			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
> +			(now.tv_nsec + ts->tv_nsec) / 1000000000;
>  	reply->nb_sent = 0;
>  	reply->nb_received = 0;
>  	reply->msgs = NULL;
> --
> 2.7.4
> 

Acked-by: Morten Brørup <mb@smartsharesystems.com>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
  2021-04-29  7:12   ` Morten Brørup
@ 2021-05-04 16:49   ` Thomas Monjalon
  2021-05-04 19:12     ` Morten Brørup
  2021-05-05  3:43   ` [dpdk-dev] [PATCH v3] " Chengwen Feng
  2 siblings, 1 reply; 18+ messages in thread
From: Thomas Monjalon @ 2021-05-04 16:49 UTC (permalink / raw)
  To: Min Hu (Connor); +Cc: dev, ferruh.yigit, mb, skori, jerinj

29/04/2021 04:10, Min Hu (Connor):
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> ---
[...]
> --- a/lib/eal/common/eal_common_proc.c
> +++ b/lib/eal/common/eal_common_proc.c
> -	if (gettimeofday(&now, NULL) < 0) {
> -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> -		goto no_trigger;
> -	}
> -	ts_now.tv_nsec = now.tv_usec * 1000;
> -	ts_now.tv_sec = now.tv_sec;
> +	clock_gettime(CLOCK_MONOTONIC, &ts_now);

Why not testing the return value?

I think this change would not be appropriate after -rc1.
If you agree, I will postpone to DPDK 21.08.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-05-04 16:49   ` Thomas Monjalon
@ 2021-05-04 19:12     ` Morten Brørup
  2021-05-05  6:14       ` Thomas Monjalon
  0 siblings, 1 reply; 18+ messages in thread
From: Morten Brørup @ 2021-05-04 19:12 UTC (permalink / raw)
  To: Thomas Monjalon, Min Hu (Connor); +Cc: dev, ferruh.yigit, skori, jerinj

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Tuesday, May 4, 2021 6:50 PM
> 
> 29/04/2021 04:10, Min Hu (Connor):
> > Currently, the mp uses gettimeofday() API to get the time, and used
> as
> > timeout parameter.
> >
> > But the time which gets from gettimeofday() API isn't monotonically
> > increasing. The process may fail if the system time is changed.
> >
> > This fixes it by using clock_gettime() API with monotonic
> attribution.
> >
> > Fixes: 783b6e54971d ("eal: add synchronous multi-process
> communication")
> > Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> > ---
> [...]
> > --- a/lib/eal/common/eal_common_proc.c
> > +++ b/lib/eal/common/eal_common_proc.c
> > -	if (gettimeofday(&now, NULL) < 0) {
> > -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> > -		goto no_trigger;
> > -	}
> > -	ts_now.tv_nsec = now.tv_usec * 1000;
> > -	ts_now.tv_sec = now.tv_sec;
> > +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> 
> Why not testing the return value?

Because it is guaranteed not to fail. Ref:
https://linux.die.net/man/3/clock_gettime
https://www.freebsd.org/cgi/man.cgi?query=clock_gettime

> 
> I think this change would not be appropriate after -rc1.
> If you agree, I will postpone to DPDK 21.08.

It does fix a serious bug, where IPC timeouts can incorrectly happen. And this is not a theoretical bug; I have seen errors happen due to using the wrong clock source in other projects.

However, I have no clue if these IPC library functions are important or not. So I have no qualified opinion about postponing the change.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v3] eal: fix use wrong time API
  2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
  2021-04-29  7:12   ` Morten Brørup
  2021-05-04 16:49   ` Thomas Monjalon
@ 2021-05-05  3:43   ` Chengwen Feng
  2021-05-11  7:36     ` Thomas Monjalon
  2 siblings, 1 reply; 18+ messages in thread
From: Chengwen Feng @ 2021-05-05  3:43 UTC (permalink / raw)
  To: thomas, ferruh.yigit, mb, skori, jerinj; +Cc: dev, linuxarm

Currently, the mp uses gettimeofday() API to get the time, and used as
timeout parameter.

But the time which gets from gettimeofday() API isn't monotonically
increasing. The process may fail if the system time is changed.

This fixes it by using clock_gettime() API with monotonic attribution.

Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
v3:
* add acked-by.
* change patch's author.

v2:
* get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC
instead.
---
 lib/eal/common/eal_common_proc.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 6d1af3c..6af8ba2 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -490,14 +490,8 @@ async_reply_handle_thread_unsafe(void *arg)
 	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
-	struct timeval now;
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Cannot get current time\n");
-		goto no_trigger;
-	}
-	ts_now.tv_nsec = now.tv_usec * 1000;
-	ts_now.tv_sec = now.tv_sec;
+	clock_gettime(CLOCK_MONOTONIC, &ts_now);
 
 	action = process_async_request(req, &ts_now);
 
@@ -896,6 +890,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
+	pthread_condattr_t attr;
 	struct rte_mp_msg msg, *tmp;
 	struct pending_request pending_req, *exist;
 
@@ -904,7 +899,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
 	pending_req.request = req;
 	pending_req.reply = &msg;
-	pthread_cond_init(&pending_req.sync.cond, NULL);
+	pthread_condattr_init(&attr);
+	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+	pthread_cond_init(&pending_req.sync.cond, &attr);
 
 	exist = find_pending_request(dst, req->name);
 	if (exist) {
@@ -967,8 +964,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
-	struct timespec end;
+	struct timespec now, end;
 	const struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 
@@ -987,15 +983,10 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		goto end;
-	}
-
-	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end.tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
@@ -1069,7 +1060,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
+	struct timespec now;
 	struct timespec *end;
 	bool dummy_used = false;
 	const struct internal_config *internal_conf =
@@ -1086,11 +1077,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Failed to get current time\n");
-		rte_errno = errno;
-		return -1;
-	}
 	copy = calloc(1, sizeof(*copy));
 	dummy = calloc(1, sizeof(*dummy));
 	param = calloc(1, sizeof(*param));
@@ -1108,9 +1094,10 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	end = &param->end;
 	reply = &param->user_reply;
 
-	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end->tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-05-04 19:12     ` Morten Brørup
@ 2021-05-05  6:14       ` Thomas Monjalon
  2021-05-05  6:26         ` Morten Brørup
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Monjalon @ 2021-05-05  6:14 UTC (permalink / raw)
  To: Min Hu (Connor), Morten Brørup; +Cc: dev, ferruh.yigit, skori, jerinj

04/05/2021 21:12, Morten Brørup:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Tuesday, May 4, 2021 6:50 PM
> > 
> > 29/04/2021 04:10, Min Hu (Connor):
> > > Currently, the mp uses gettimeofday() API to get the time, and used
> > as
> > > timeout parameter.
> > >
> > > But the time which gets from gettimeofday() API isn't monotonically
> > > increasing. The process may fail if the system time is changed.
> > >
> > > This fixes it by using clock_gettime() API with monotonic
> > attribution.
> > >
> > > Fixes: 783b6e54971d ("eal: add synchronous multi-process
> > communication")
> > > Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> > > ---
> > [...]
> > > --- a/lib/eal/common/eal_common_proc.c
> > > +++ b/lib/eal/common/eal_common_proc.c
> > > -	if (gettimeofday(&now, NULL) < 0) {
> > > -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> > > -		goto no_trigger;
> > > -	}
> > > -	ts_now.tv_nsec = now.tv_usec * 1000;
> > > -	ts_now.tv_sec = now.tv_sec;
> > > +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> > 
> > Why not testing the return value?
> 
> Because it is guaranteed not to fail. Ref:
> https://linux.die.net/man/3/clock_gettime
> https://www.freebsd.org/cgi/man.cgi?query=clock_gettime

I see "return 0 for success, or -1 for failure".
Where is it said it cannot fail?

> > I think this change would not be appropriate after -rc1.
> > If you agree, I will postpone to DPDK 21.08.
> 
> It does fix a serious bug, where IPC timeouts can incorrectly happen. And this is not a theoretical bug; I have seen errors happen due to using the wrong clock source in other projects.
> 
> However, I have no clue if these IPC library functions are important or not. So I have no qualified opinion about postponing the change.

I think nobody hit such bug with DPDK IPC.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-05-05  6:14       ` Thomas Monjalon
@ 2021-05-05  6:26         ` Morten Brørup
  2021-05-05  6:51           ` Thomas Monjalon
  0 siblings, 1 reply; 18+ messages in thread
From: Morten Brørup @ 2021-05-05  6:26 UTC (permalink / raw)
  To: Thomas Monjalon, Min Hu (Connor); +Cc: dev, ferruh.yigit, skori, jerinj

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> Sent: Wednesday, May 5, 2021 8:14 AM
> 
> 04/05/2021 21:12, Morten Brørup:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> Monjalon
> > > Sent: Tuesday, May 4, 2021 6:50 PM
> > >
> > > 29/04/2021 04:10, Min Hu (Connor):
> > > > Currently, the mp uses gettimeofday() API to get the time, and
> used
> > > as
> > > > timeout parameter.
> > > >
> > > > But the time which gets from gettimeofday() API isn't
> monotonically
> > > > increasing. The process may fail if the system time is changed.
> > > >
> > > > This fixes it by using clock_gettime() API with monotonic
> > > attribution.
> > > >
> > > > Fixes: 783b6e54971d ("eal: add synchronous multi-process
> > > communication")
> > > > Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > > Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> > > > ---
> > > [...]
> > > > --- a/lib/eal/common/eal_common_proc.c
> > > > +++ b/lib/eal/common/eal_common_proc.c
> > > > -	if (gettimeofday(&now, NULL) < 0) {
> > > > -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> > > > -		goto no_trigger;
> > > > -	}
> > > > -	ts_now.tv_nsec = now.tv_usec * 1000;
> > > > -	ts_now.tv_sec = now.tv_sec;
> > > > +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> > >
> > > Why not testing the return value?
> >
> > Because it is guaranteed not to fail. Ref:
> > https://linux.die.net/man/3/clock_gettime
> > https://www.freebsd.org/cgi/man.cgi?query=clock_gettime
> 
> I see "return 0 for success, or -1 for failure".
> Where is it said it cannot fail?

I'm sorry about being unclear. Referring to the "Errors" chapter in the function's man page, this function call is guaranteed not to fail with these parameters. So there is no need to check the return value.

> 
> > > I think this change would not be appropriate after -rc1.
> > > If you agree, I will postpone to DPDK 21.08.
> >
> > It does fix a serious bug, where IPC timeouts can incorrectly happen.
> And this is not a theoretical bug; I have seen errors happen due to
> using the wrong clock source in other projects.
> >
> > However, I have no clue if these IPC library functions are important
> or not. So I have no qualified opinion about postponing the change.
> 
> I think nobody hit such bug with DPDK IPC.
> 

Then I don't object to postponing.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-05-05  6:26         ` Morten Brørup
@ 2021-05-05  6:51           ` Thomas Monjalon
  2021-05-05  7:08             ` Morten Brørup
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Monjalon @ 2021-05-05  6:51 UTC (permalink / raw)
  To: Min Hu (Connor), Morten Brørup; +Cc: dev, ferruh.yigit, skori, jerinj

05/05/2021 08:26, Morten Brørup:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas Monjalon
> > Sent: Wednesday, May 5, 2021 8:14 AM
> > 
> > 04/05/2021 21:12, Morten Brørup:
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> > Monjalon
> > > > Sent: Tuesday, May 4, 2021 6:50 PM
> > > >
> > > > 29/04/2021 04:10, Min Hu (Connor):
> > > > > Currently, the mp uses gettimeofday() API to get the time, and
> > used
> > > > as
> > > > > timeout parameter.
> > > > >
> > > > > But the time which gets from gettimeofday() API isn't
> > monotonically
> > > > > increasing. The process may fail if the system time is changed.
> > > > >
> > > > > This fixes it by using clock_gettime() API with monotonic
> > > > attribution.
> > > > >
> > > > > Fixes: 783b6e54971d ("eal: add synchronous multi-process
> > > > communication")
> > > > > Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> > > > > Cc: stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > > > Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> > > > > ---
> > > > [...]
> > > > > --- a/lib/eal/common/eal_common_proc.c
> > > > > +++ b/lib/eal/common/eal_common_proc.c
> > > > > -	if (gettimeofday(&now, NULL) < 0) {
> > > > > -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> > > > > -		goto no_trigger;
> > > > > -	}
> > > > > -	ts_now.tv_nsec = now.tv_usec * 1000;
> > > > > -	ts_now.tv_sec = now.tv_sec;
> > > > > +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> > > >
> > > > Why not testing the return value?
> > >
> > > Because it is guaranteed not to fail. Ref:
> > > https://linux.die.net/man/3/clock_gettime
> > > https://www.freebsd.org/cgi/man.cgi?query=clock_gettime
> > 
> > I see "return 0 for success, or -1 for failure".
> > Where is it said it cannot fail?
> 
> I'm sorry about being unclear. Referring to the "Errors" chapter in the function's man page, this function call is guaranteed not to fail with these parameters. So there is no need to check the return value.

I don't agree.
Especially for this error:
"The clk_id specified is not supported on this system."
How can you be sure it is always supported for any system
we try to run this code now and in future experiments?




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v2] eal: fix use wrong time API
  2021-05-05  6:51           ` Thomas Monjalon
@ 2021-05-05  7:08             ` Morten Brørup
  0 siblings, 0 replies; 18+ messages in thread
From: Morten Brørup @ 2021-05-05  7:08 UTC (permalink / raw)
  To: Thomas Monjalon, Min Hu (Connor); +Cc: dev, ferruh.yigit, skori, jerinj

> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Wednesday, May 5, 2021 8:51 AM
> 
> 05/05/2021 08:26, Morten Brørup:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> Monjalon
> > > Sent: Wednesday, May 5, 2021 8:14 AM
> > >
> > > 04/05/2021 21:12, Morten Brørup:
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Thomas
> > > Monjalon
> > > > > Sent: Tuesday, May 4, 2021 6:50 PM
> > > > >
> > > > > 29/04/2021 04:10, Min Hu (Connor):
> > > > > > Currently, the mp uses gettimeofday() API to get the time,
> and
> > > used
> > > > > as
> > > > > > timeout parameter.
> > > > > >
> > > > > > But the time which gets from gettimeofday() API isn't
> > > monotonically
> > > > > > increasing. The process may fail if the system time is
> changed.
> > > > > >
> > > > > > This fixes it by using clock_gettime() API with monotonic
> > > > > attribution.
> > > > > >
> > > > > > Fixes: 783b6e54971d ("eal: add synchronous multi-process
> > > > > communication")
> > > > > > Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> > > > > > Cc: stable@dpdk.org
> > > > > >
> > > > > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > > > > Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> > > > > > ---
> > > > > [...]
> > > > > > --- a/lib/eal/common/eal_common_proc.c	
> > > > > > +++ b/lib/eal/common/eal_common_proc.c
> > > > > > -	if (gettimeofday(&now, NULL) < 0) {
> > > > > > -		RTE_LOG(ERR, EAL, "Cannot get current time\n");
> > > > > > -		goto no_trigger;
> > > > > > -	}
> > > > > > -	ts_now.tv_nsec = now.tv_usec * 1000;
> > > > > > -	ts_now.tv_sec = now.tv_sec;
> > > > > > +	clock_gettime(CLOCK_MONOTONIC, &ts_now);
> > > > >
> > > > > Why not testing the return value?
> > > >
> > > > Because it is guaranteed not to fail. Ref:
> > > > https://linux.die.net/man/3/clock_gettime
> > > > https://www.freebsd.org/cgi/man.cgi?query=clock_gettime
> > >
> > > I see "return 0 for success, or -1 for failure".
> > > Where is it said it cannot fail?
> >
> > I'm sorry about being unclear. Referring to the "Errors" chapter in
> the function's man page, this function call is guaranteed not to fail
> with these parameters. So there is no need to check the return value.
> 
> I don't agree.
> Especially for this error:
> "The clk_id specified is not supported on this system."
> How can you be sure it is always supported for any system
> we try to run this code now and in future experiments?
> 

I referred to both Linux and BSD man pages because they say that it is defined there. This should cover current supported systems, excl. Windows, which doesn't have gettimeofday() or clock_gettime() anyway.

However, for future operating systems that might be supported by DPDK, you are correct that CLOCK_MONOTONIC is optional.
Ref: https://pubs.opengroup.org/onlinepubs/009695399/basedefs/time.h.html

This can be fixed by adding:
#if !defined(CLOCK_MONOTONIC)
#error CLOCK_MONOTONIC is not defined.
#endif

If we only check the return value at runtime, the function would fail every time on a system without CLOCK_MONOTONIC, and eal_common_proc.c would need to be fixed anyway. It's better to catch at compile time.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal: fix use wrong time API
  2021-05-05  3:43   ` [dpdk-dev] [PATCH v3] " Chengwen Feng
@ 2021-05-11  7:36     ` Thomas Monjalon
  2021-05-11 10:49       ` fengchengwen
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Monjalon @ 2021-05-11  7:36 UTC (permalink / raw)
  To: Chengwen Feng; +Cc: ferruh.yigit, mb, skori, jerinj, dev, linuxarm

05/05/2021 05:43, Chengwen Feng:
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v3:
> * add acked-by.
> * change patch's author.

I did some comments on v2 about potential errors to catch,
but you sent this v3 without participating in v2 discussion.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* [dpdk-dev] [PATCH v4] eal: fix use wrong time API
  2021-04-28  8:12 [dpdk-dev] [PATCH] eal: fix use wrong time API Min Hu (Connor)
  2021-04-28  9:24 ` Morten Brørup
  2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
@ 2021-05-11 10:41 ` Chengwen Feng
  2021-05-12 14:52   ` Thomas Monjalon
  2 siblings, 1 reply; 18+ messages in thread
From: Chengwen Feng @ 2021-05-11 10:41 UTC (permalink / raw)
  To: thomas, ferruh.yigit, mb, skori, jerinj; +Cc: dev

Currently, the mp uses gettimeofday() API to get the time, and used as
timeout parameter.

But the time which gets from gettimeofday() API isn't monotonically
increasing. The process may fail if the system time is changed.

This fixes it by using clock_gettime() API with monotonic attribution.

Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
Cc: stable@dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
v4:
* add check clock_gettime() API's retcode
v3:
* add acked-by.
* change patch's author.
v2:
* get rid of the CLOCK_TYPE_ID definition and use CLOCK_MONOTONIC
instead.
---
 lib/eal/common/eal_common_proc.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 6d1af3c..dc4a2ef 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -490,14 +490,11 @@ async_reply_handle_thread_unsafe(void *arg)
 	struct pending_request *req = (struct pending_request *)arg;
 	enum async_action action;
 	struct timespec ts_now;
-	struct timeval now;
 
-	if (gettimeofday(&now, NULL) < 0) {
+	if (clock_gettime(CLOCK_MONOTONIC, &ts_now) < 0) {
 		RTE_LOG(ERR, EAL, "Cannot get current time\n");
 		goto no_trigger;
 	}
-	ts_now.tv_nsec = now.tv_usec * 1000;
-	ts_now.tv_sec = now.tv_sec;
 
 	action = process_async_request(req, &ts_now);
 
@@ -896,6 +893,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	       struct rte_mp_reply *reply, const struct timespec *ts)
 {
 	int ret;
+	pthread_condattr_t attr;
 	struct rte_mp_msg msg, *tmp;
 	struct pending_request pending_req, *exist;
 
@@ -904,7 +902,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req,
 	strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
 	pending_req.request = req;
 	pending_req.reply = &msg;
-	pthread_cond_init(&pending_req.sync.cond, NULL);
+	pthread_condattr_init(&attr);
+	pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+	pthread_cond_init(&pending_req.sync.cond, &attr);
 
 	exist = find_pending_request(dst, req->name);
 	if (exist) {
@@ -967,8 +967,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
-	struct timespec end;
+	struct timespec now, end;
 	const struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 
@@ -987,15 +986,15 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
+	if (clock_gettime(CLOCK_MONOTONIC, &now) < 0) {
 		RTE_LOG(ERR, EAL, "Failed to get current time\n");
 		rte_errno = errno;
 		goto end;
 	}
 
-	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end.tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
@@ -1069,7 +1068,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	int dir_fd, ret = 0;
 	DIR *mp_dir;
 	struct dirent *ent;
-	struct timeval now;
+	struct timespec now;
 	struct timespec *end;
 	bool dummy_used = false;
 	const struct internal_config *internal_conf =
@@ -1086,7 +1085,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		return -1;
 	}
 
-	if (gettimeofday(&now, NULL) < 0) {
+	if (clock_gettime(CLOCK_MONOTONIC, &now) < 0) {
 		RTE_LOG(ERR, EAL, "Failed to get current time\n");
 		rte_errno = errno;
 		return -1;
@@ -1108,9 +1107,9 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	end = &param->end;
 	reply = &param->user_reply;
 
-	end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+	end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000;
 	end->tv_sec = now.tv_sec + ts->tv_sec +
-			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+			(now.tv_nsec + ts->tv_nsec) / 1000000000;
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v3] eal: fix use wrong time API
  2021-05-11  7:36     ` Thomas Monjalon
@ 2021-05-11 10:49       ` fengchengwen
  0 siblings, 0 replies; 18+ messages in thread
From: fengchengwen @ 2021-05-11 10:49 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: ferruh.yigit, mb, skori, jerinj, dev, linuxarm



On 2021/5/11 15:36, Thomas Monjalon wrote:
> 05/05/2021 05:43, Chengwen Feng:
>> Currently, the mp uses gettimeofday() API to get the time, and used as
>> timeout parameter.
>>
>> But the time which gets from gettimeofday() API isn't monotonically
>> increasing. The process may fail if the system time is changed.
>>
>> This fixes it by using clock_gettime() API with monotonic attribution.
>>
>> Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
>> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
>> Acked-by: Morten Brørup <mb@smartsharesystems.com>
>> ---
>> v3:
>> * add acked-by.
>> * change patch's author.
> 
> I did some comments on v2 about potential errors to catch,
> but you sent this v3 without participating in v2 discussion.
> 
> 

Fixed in v4, thanks

> 
> .
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [dpdk-dev] [PATCH v4] eal: fix use wrong time API
  2021-05-11 10:41 ` [dpdk-dev] [PATCH v4] " Chengwen Feng
@ 2021-05-12 14:52   ` Thomas Monjalon
  0 siblings, 0 replies; 18+ messages in thread
From: Thomas Monjalon @ 2021-05-12 14:52 UTC (permalink / raw)
  To: Chengwen Feng; +Cc: ferruh.yigit, mb, skori, jerinj, dev

11/05/2021 12:41, Chengwen Feng:
> Currently, the mp uses gettimeofday() API to get the time, and used as
> timeout parameter.
> 
> But the time which gets from gettimeofday() API isn't monotonically
> increasing. The process may fail if the system time is changed.
> 
> This fixes it by using clock_gettime() API with monotonic attribution.
> 
> Fixes: 783b6e54971d ("eal: add synchronous multi-process communication")
> Fixes: f05e26051c15 ("eal: add IPC asynchronous request")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>

Applied, thanks.




^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-05-12 14:52 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-28  8:12 [dpdk-dev] [PATCH] eal: fix use wrong time API Min Hu (Connor)
2021-04-28  9:24 ` Morten Brørup
2021-04-28 10:36   ` fengchengwen
2021-04-28 10:59     ` Morten Brørup
2021-04-29  2:12       ` Min Hu (Connor)
2021-04-29  2:10 ` [dpdk-dev] [PATCH v2] " Min Hu (Connor)
2021-04-29  7:12   ` Morten Brørup
2021-05-04 16:49   ` Thomas Monjalon
2021-05-04 19:12     ` Morten Brørup
2021-05-05  6:14       ` Thomas Monjalon
2021-05-05  6:26         ` Morten Brørup
2021-05-05  6:51           ` Thomas Monjalon
2021-05-05  7:08             ` Morten Brørup
2021-05-05  3:43   ` [dpdk-dev] [PATCH v3] " Chengwen Feng
2021-05-11  7:36     ` Thomas Monjalon
2021-05-11 10:49       ` fengchengwen
2021-05-11 10:41 ` [dpdk-dev] [PATCH v4] " Chengwen Feng
2021-05-12 14:52   ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).