patches for DPDK stable branches
 help / color / Atom feed
* [dpdk-stable] [PATCH 05/10] service: remove rte prefix from static functions
       [not found] <1583862551-2049-1-git-send-email-phil.yang@arm.com>
@ 2020-03-10 17:49 ` Phil Yang
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 06/10] service: remove redundant code Phil Yang
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-10 17:49 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, stable

Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 7e537b8..a691f5d 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -333,7 +333,7 @@ rte_service_runstate_get(uint32_t id)
 }
 
 static inline void
-rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+service_runner_do_callback(struct rte_service_spec_impl *s,
 			       struct core_state *cs, uint32_t service_idx)
 {
 	void *userdata = s->spec.callback_userdata;
@@ -376,10 +376,10 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 		rte_atomic32_clear(&s->execute_lock);
 	} else
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 
 	return 0;
 }
@@ -433,7 +433,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 }
 
 static int32_t
-rte_service_runner_func(void *arg)
+service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint32_t i;
@@ -703,7 +703,7 @@ rte_service_lcore_start(uint32_t lcore)
 	 */
 	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
 
-	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
+	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
 	/* returns -EBUSY if the core is already launched, 0 on success */
 	return ret;
 }
@@ -782,7 +782,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 }
 
 static void
-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
+service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
 {
 	/* avoid divide by zero */
@@ -815,7 +815,7 @@ rte_service_attr_reset_all(uint32_t id)
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
 	int reset = 1;
-	rte_service_dump_one(NULL, s, 0, reset);
+	service_dump_one(NULL, s, 0, reset);
 	return 0;
 }
 
@@ -873,7 +873,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 		fprintf(f, "Service %s Summary\n", s->spec.name);
 		uint32_t reset = 0;
-		rte_service_dump_one(f, s, total_cycles, reset);
+		service_dump_one(f, s, total_cycles, reset);
 		return 0;
 	}
 
@@ -883,7 +883,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		if (!service_valid(i))
 			continue;
 		uint32_t reset = 0;
-		rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
+		service_dump_one(f, &rte_services[i], total_cycles, reset);
 	}
 
 	fprintf(f, "Service Cores Summary\n");
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH 06/10] service: remove redundant code
       [not found] <1583862551-2049-1-git-send-email-phil.yang@arm.com>
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 05/10] service: remove rte prefix from static functions Phil Yang
@ 2020-03-10 17:49 ` Phil Yang
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 07/10] service: avoid race condition for MT unsafe service Phil Yang
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-10 17:49 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Stable

The service id validation is verified in the calling function, remove
the redundant code inside the service_update function.

Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: Stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index a691f5d..6990dc2 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -549,21 +549,10 @@ rte_service_start_with_defaults(void)
 }
 
 static int32_t
-service_update(struct rte_service_spec *service, uint32_t lcore,
+service_update(uint32_t sid, uint32_t lcore,
 		uint32_t *set, uint32_t *enabled)
 {
-	uint32_t i;
-	int32_t sid = -1;
-
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		if ((struct rte_service_spec *)&rte_services[i] == service &&
-				service_valid(i)) {
-			sid = i;
-			break;
-		}
-	}
-
-	if (sid == -1 || lcore >= RTE_MAX_LCORE)
+	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
 	if (!lcore_states[lcore].is_service_core)
@@ -595,19 +584,23 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
 int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t on = enabled > 0;
-	return service_update(&s->spec, lcore, &on, 0);
+	return service_update(id, lcore, &on, 0);
 }
 
 int32_t
 rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t enabled;
-	int ret = service_update(&s->spec, lcore, 0, &enabled);
+	int ret = service_update(id, lcore, 0, &enabled);
 	if (ret == 0)
 		return enabled;
 	return ret;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH 07/10] service: avoid race condition for MT unsafe service
       [not found] <1583862551-2049-1-git-send-email-phil.yang@arm.com>
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 05/10] service: remove rte prefix from static functions Phil Yang
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 06/10] service: remove redundant code Phil Yang
@ 2020-03-10 17:49 ` Phil Yang
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 08/10] service: identify service running on another core correctly Phil Yang
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-10 17:49 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

There has possible that a MT unsafe service might get configured to
run on another core while the service is running currently. This
might result in the MT unsafe service running on multiple cores
simultaneously. Use 'execute_lock' always when the service is
MT unsafe.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 6990dc2..b37fc56 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -367,12 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH 08/10] service: identify service running on another core correctly
       [not found] <1583862551-2049-1-git-send-email-phil.yang@arm.com>
                   ` (2 preceding siblings ...)
  2020-03-10 17:49 ` [dpdk-stable] [PATCH 07/10] service: avoid race condition for MT unsafe service Phil Yang
@ 2020-03-10 17:49 ` Phil Yang
       [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
       [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
  5 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-10 17:49 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes more costlier than using atomic operations. Assume that the
application passes the right parameters and reduce the number of
instructions for all cases.

Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b37fc56..0186024 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -357,7 +357,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -371,7 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -409,24 +409,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service is
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -446,7 +436,7 @@ service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions
       [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
@ 2020-03-12  6:13   ` Phil Yang
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  6:13 UTC (permalink / raw)
  To: phil.yang; +Cc: nd, stable

Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 7e537b8..a691f5d 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -333,7 +333,7 @@ rte_service_runstate_get(uint32_t id)
 }
 
 static inline void
-rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+service_runner_do_callback(struct rte_service_spec_impl *s,
 			       struct core_state *cs, uint32_t service_idx)
 {
 	void *userdata = s->spec.callback_userdata;
@@ -376,10 +376,10 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 		rte_atomic32_clear(&s->execute_lock);
 	} else
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 
 	return 0;
 }
@@ -433,7 +433,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 }
 
 static int32_t
-rte_service_runner_func(void *arg)
+service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint32_t i;
@@ -703,7 +703,7 @@ rte_service_lcore_start(uint32_t lcore)
 	 */
 	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
 
-	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
+	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
 	/* returns -EBUSY if the core is already launched, 0 on success */
 	return ret;
 }
@@ -782,7 +782,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 }
 
 static void
-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
+service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
 {
 	/* avoid divide by zero */
@@ -815,7 +815,7 @@ rte_service_attr_reset_all(uint32_t id)
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
 	int reset = 1;
-	rte_service_dump_one(NULL, s, 0, reset);
+	service_dump_one(NULL, s, 0, reset);
 	return 0;
 }
 
@@ -873,7 +873,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 		fprintf(f, "Service %s Summary\n", s->spec.name);
 		uint32_t reset = 0;
-		rte_service_dump_one(f, s, total_cycles, reset);
+		service_dump_one(f, s, total_cycles, reset);
 		return 0;
 	}
 
@@ -883,7 +883,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		if (!service_valid(i))
 			continue;
 		uint32_t reset = 0;
-		rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
+		service_dump_one(f, &rte_services[i], total_cycles, reset);
 	}
 
 	fprintf(f, "Service Cores Summary\n");
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 06/10] service: remove redundant code
       [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
@ 2020-03-12  6:13   ` Phil Yang
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly Phil Yang
  3 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  6:13 UTC (permalink / raw)
  To: phil.yang; +Cc: nd, Stable

The service id validation is verified in the calling function, remove
the redundant code inside the service_update function.

Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: Stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index a691f5d..6990dc2 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -549,21 +549,10 @@ rte_service_start_with_defaults(void)
 }
 
 static int32_t
-service_update(struct rte_service_spec *service, uint32_t lcore,
+service_update(uint32_t sid, uint32_t lcore,
 		uint32_t *set, uint32_t *enabled)
 {
-	uint32_t i;
-	int32_t sid = -1;
-
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		if ((struct rte_service_spec *)&rte_services[i] == service &&
-				service_valid(i)) {
-			sid = i;
-			break;
-		}
-	}
-
-	if (sid == -1 || lcore >= RTE_MAX_LCORE)
+	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
 	if (!lcore_states[lcore].is_service_core)
@@ -595,19 +584,23 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
 int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t on = enabled > 0;
-	return service_update(&s->spec, lcore, &on, 0);
+	return service_update(id, lcore, &on, 0);
 }
 
 int32_t
 rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t enabled;
-	int ret = service_update(&s->spec, lcore, 0, &enabled);
+	int ret = service_update(id, lcore, 0, &enabled);
 	if (ret == 0)
 		return enabled;
 	return ret;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service
       [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
@ 2020-03-12  6:13   ` Phil Yang
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly Phil Yang
  3 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  6:13 UTC (permalink / raw)
  To: phil.yang; +Cc: nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

There has possible that a MT unsafe service might get configured to
run on another core while the service is running currently. This
might result in the MT unsafe service running on multiple cores
simultaneously. Use 'execute_lock' always when the service is
MT unsafe.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 6990dc2..b37fc56 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -367,12 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly
       [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
                     ` (2 preceding siblings ...)
  2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
@ 2020-03-12  6:13   ` Phil Yang
  3 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  6:13 UTC (permalink / raw)
  To: phil.yang; +Cc: nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes more costlier than using atomic operations. Assume that the
application passes the right parameters and reduce the number of
instructions for all cases.

Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b37fc56..0186024 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -357,7 +357,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -371,7 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -409,24 +409,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service is
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -446,7 +436,7 @@ service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions
       [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
@ 2020-03-12  7:44   ` Phil Yang
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  7:44 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, stable

Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 7e537b8..a691f5d 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -333,7 +333,7 @@ rte_service_runstate_get(uint32_t id)
 }
 
 static inline void
-rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+service_runner_do_callback(struct rte_service_spec_impl *s,
 			       struct core_state *cs, uint32_t service_idx)
 {
 	void *userdata = s->spec.callback_userdata;
@@ -376,10 +376,10 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 		rte_atomic32_clear(&s->execute_lock);
 	} else
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 
 	return 0;
 }
@@ -433,7 +433,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 }
 
 static int32_t
-rte_service_runner_func(void *arg)
+service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint32_t i;
@@ -703,7 +703,7 @@ rte_service_lcore_start(uint32_t lcore)
 	 */
 	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
 
-	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
+	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
 	/* returns -EBUSY if the core is already launched, 0 on success */
 	return ret;
 }
@@ -782,7 +782,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 }
 
 static void
-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
+service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
 {
 	/* avoid divide by zero */
@@ -815,7 +815,7 @@ rte_service_attr_reset_all(uint32_t id)
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
 	int reset = 1;
-	rte_service_dump_one(NULL, s, 0, reset);
+	service_dump_one(NULL, s, 0, reset);
 	return 0;
 }
 
@@ -873,7 +873,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 		fprintf(f, "Service %s Summary\n", s->spec.name);
 		uint32_t reset = 0;
-		rte_service_dump_one(f, s, total_cycles, reset);
+		service_dump_one(f, s, total_cycles, reset);
 		return 0;
 	}
 
@@ -883,7 +883,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		if (!service_valid(i))
 			continue;
 		uint32_t reset = 0;
-		rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
+		service_dump_one(f, &rte_services[i], total_cycles, reset);
 	}
 
 	fprintf(f, "Service Cores Summary\n");
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 06/10] service: remove redundant code
       [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
@ 2020-03-12  7:44   ` Phil Yang
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  7:44 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Stable

The service id validation is verified in the calling function, remove
the redundant code inside the service_update function.

Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: Stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index a691f5d..6990dc2 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -549,21 +549,10 @@ rte_service_start_with_defaults(void)
 }
 
 static int32_t
-service_update(struct rte_service_spec *service, uint32_t lcore,
+service_update(uint32_t sid, uint32_t lcore,
 		uint32_t *set, uint32_t *enabled)
 {
-	uint32_t i;
-	int32_t sid = -1;
-
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		if ((struct rte_service_spec *)&rte_services[i] == service &&
-				service_valid(i)) {
-			sid = i;
-			break;
-		}
-	}
-
-	if (sid == -1 || lcore >= RTE_MAX_LCORE)
+	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
 	if (!lcore_states[lcore].is_service_core)
@@ -595,19 +584,23 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
 int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t on = enabled > 0;
-	return service_update(&s->spec, lcore, &on, 0);
+	return service_update(id, lcore, &on, 0);
 }
 
 int32_t
 rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t enabled;
-	int ret = service_update(&s->spec, lcore, 0, &enabled);
+	int ret = service_update(id, lcore, 0, &enabled);
 	if (ret == 0)
 		return enabled;
 	return ret;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service
       [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
@ 2020-03-12  7:44   ` Phil Yang
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly Phil Yang
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
  4 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  7:44 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

There has possible that a MT unsafe service might get configured to
run on another core while the service is running currently. This
might result in the MT unsafe service running on multiple cores
simultaneously. Use 'execute_lock' always when the service is
MT unsafe.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/rte_service.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 6990dc2..b37fc56 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -367,12 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly
       [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
                     ` (2 preceding siblings ...)
  2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
@ 2020-03-12  7:44   ` Phil Yang
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
  4 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-12  7:44 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduces the number of
instructions for all cases.

Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b37fc56..670f5a9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -357,7 +357,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -371,7 +371,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -409,24 +409,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -446,7 +436,7 @@ service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
@ 2020-03-17  1:17     ` Phil Yang
  2020-04-03 11:57       ` Van Haaren, Harry
                         ` (2 more replies)
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 08/12] service: remove redundant code Phil Yang
                       ` (2 subsequent siblings)
  3 siblings, 3 replies; 51+ messages in thread
From: Phil Yang @ 2020-03-17  1:17 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, stable

Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b0b78ba..2117726 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -336,7 +336,7 @@ rte_service_runstate_get(uint32_t id)
 }
 
 static inline void
-rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+service_runner_do_callback(struct rte_service_spec_impl *s,
 			       struct core_state *cs, uint32_t service_idx)
 {
 	void *userdata = s->spec.callback_userdata;
@@ -379,10 +379,10 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 		rte_atomic32_clear(&s->execute_lock);
 	} else
-		rte_service_runner_do_callback(s, cs, i);
+		service_runner_do_callback(s, cs, i);
 
 	return 0;
 }
@@ -436,7 +436,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 }
 
 static int32_t
-rte_service_runner_func(void *arg)
+service_runner_func(void *arg)
 {
 	RTE_SET_USED(arg);
 	uint32_t i;
@@ -706,7 +706,7 @@ rte_service_lcore_start(uint32_t lcore)
 	 */
 	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
 
-	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
+	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
 	/* returns -EBUSY if the core is already launched, 0 on success */
 	return ret;
 }
@@ -785,7 +785,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
 }
 
 static void
-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
+service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
 {
 	/* avoid divide by zero */
@@ -818,7 +818,7 @@ rte_service_attr_reset_all(uint32_t id)
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
 	int reset = 1;
-	rte_service_dump_one(NULL, s, 0, reset);
+	service_dump_one(NULL, s, 0, reset);
 	return 0;
 }
 
@@ -876,7 +876,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 		fprintf(f, "Service %s Summary\n", s->spec.name);
 		uint32_t reset = 0;
-		rte_service_dump_one(f, s, total_cycles, reset);
+		service_dump_one(f, s, total_cycles, reset);
 		return 0;
 	}
 
@@ -886,7 +886,7 @@ rte_service_dump(FILE *f, uint32_t id)
 		if (!service_valid(i))
 			continue;
 		uint32_t reset = 0;
-		rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
+		service_dump_one(f, &rte_services[i], total_cycles, reset);
 	}
 
 	fprintf(f, "Service Cores Summary\n");
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 08/12] service: remove redundant code
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions Phil Yang
@ 2020-03-17  1:17     ` Phil Yang
  2020-04-03 11:58       ` Van Haaren, Harry
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service Phil Yang
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly Phil Yang
  3 siblings, 1 reply; 51+ messages in thread
From: Phil Yang @ 2020-03-17  1:17 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Stable

The service id validation is verified in the calling function, remove
the redundant code inside the service_update function.

Fixes: 21698354c832 ("service: introduce service cores concept")
Cc: Stable@dpdk.org

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_eal/common/rte_service.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 2117726..557b5a9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -552,21 +552,10 @@ rte_service_start_with_defaults(void)
 }
 
 static int32_t
-service_update(struct rte_service_spec *service, uint32_t lcore,
+service_update(uint32_t sid, uint32_t lcore,
 		uint32_t *set, uint32_t *enabled)
 {
-	uint32_t i;
-	int32_t sid = -1;
-
-	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
-		if ((struct rte_service_spec *)&rte_services[i] == service &&
-				service_valid(i)) {
-			sid = i;
-			break;
-		}
-	}
-
-	if (sid == -1 || lcore >= RTE_MAX_LCORE)
+	if (lcore >= RTE_MAX_LCORE)
 		return -EINVAL;
 
 	if (!lcore_states[lcore].is_service_core)
@@ -598,19 +587,23 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
 int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t on = enabled > 0;
-	return service_update(&s->spec, lcore, &on, 0);
+	return service_update(id, lcore, &on, 0);
 }
 
 int32_t
 rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
 {
-	struct rte_service_spec_impl *s;
-	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+	/* validate ID, or return error value */
+	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+		return -EINVAL;
+
 	uint32_t enabled;
-	int ret = service_update(&s->spec, lcore, 0, &enabled);
+	int ret = service_update(id, lcore, 0, &enabled);
 	if (ret == 0)
 		return enabled;
 	return ret;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions Phil Yang
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 08/12] service: remove redundant code Phil Yang
@ 2020-03-17  1:17     ` Phil Yang
  2020-04-03 11:58       ` Van Haaren, Harry
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly Phil Yang
  3 siblings, 1 reply; 51+ messages in thread
From: Phil Yang @ 2020-03-17  1:17 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

There has possible that a MT unsafe service might get configured to
run on another core while the service is running currently. This
might result in the MT unsafe service running on multiple cores
simultaneously. Use 'execute_lock' always when the service is
MT unsafe.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/rte_service.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 557b5a9..32a2f8a 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly
       [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
                       ` (2 preceding siblings ...)
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service Phil Yang
@ 2020-03-17  1:17     ` Phil Yang
  2020-04-03 11:58       ` Van Haaren, Harry
  3 siblings, 1 reply; 51+ messages in thread
From: Phil Yang @ 2020-03-17  1:17 UTC (permalink / raw)
  To: thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduces the number of
instructions for all cases.

Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 32a2f8a..0843c3c 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions Phil Yang
@ 2020-04-03 11:57       ` Van Haaren, Harry
  2020-04-08 10:14         ` Phil Yang
  2020-04-05 21:35       ` Honnappa Nagarahalli
       [not found]       ` <1587659482-27133-1-git-send-email-phil.yang@arm.com>
  2 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-03 11:57 UTC (permalink / raw)
  To: Phil Yang, thomas, Ananyev, Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, stable

> From: Phil Yang <phil.yang@arm.com>
> Sent: Tuesday, March 17, 2020 1:18 AM
> To: thomas@monjalon.net; Van Haaren, Harry <harry.van.haaren@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; ruifeng.wang@arm.com;
> joyce.kong@arm.com; nd@arm.com; stable@dpdk.org
> Subject: [PATCH v3 07/12] service: remove rte prefix from static functions
> 
> Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
> Fixes: 21698354c832 ("service: introduce service cores concept")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>


This patchset needs a rebase since the EAL file movement got merged,
however I'll review here so we can include some Acks etc and make
progress.

Is this really a "Fix"? The internal function names were not exported
in the .map file, so are not part of public ABI. This is an internal
naming improvement (thanks for doing cleanup), but I don't think the
Fixes: tags make sense?

Also I'm not sure if we want to port this patch back to stable? Changing (internal) function names seems like unnecessary churn, and hence risk to a stable release, without any benefit?

---

<snip patch diff>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 08/12] service: remove redundant code
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 08/12] service: remove redundant code Phil Yang
@ 2020-04-03 11:58       ` Van Haaren, Harry
  2020-04-05 18:35         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-03 11:58 UTC (permalink / raw)
  To: Phil Yang, thomas, Ananyev, Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Stable

> From: Phil Yang <phil.yang@arm.com>
> Sent: Tuesday, March 17, 2020 1:18 AM
> To: thomas@monjalon.net; Van Haaren, Harry <harry.van.haaren@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; ruifeng.wang@arm.com;
> joyce.kong@arm.com; nd@arm.com; Stable@dpdk.org
> Subject: [PATCH v3 08/12] service: remove redundant code
> 
> The service id validation is verified in the calling function, remove
> the redundant code inside the service_update function.
> 
> Fixes: 21698354c832 ("service: introduce service cores concept")
> Cc: Stable@dpdk.org
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>


Same comment as patch 7/12, is this really a "Fix"? This functionality
is not "broken" in  the current code? And is there value in porting
to stable? I'd see this as unnecessary churn.

As before, it is a valid cleanup (thanks), and I'd like to take it for
new DPDK releases.

Happy to Ack without Fixes or Cc Stable, if that's acceptable to you?



> ---
>  lib/librte_eal/common/rte_service.c | 31 ++++++++++++-------------------
>  1 file changed, 12 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/librte_eal/common/rte_service.c
> b/lib/librte_eal/common/rte_service.c
> index 2117726..557b5a9 100644
> --- a/lib/librte_eal/common/rte_service.c
> +++ b/lib/librte_eal/common/rte_service.c
> @@ -552,21 +552,10 @@ rte_service_start_with_defaults(void)
>  }
> 
>  static int32_t
> -service_update(struct rte_service_spec *service, uint32_t lcore,
> +service_update(uint32_t sid, uint32_t lcore,
>  		uint32_t *set, uint32_t *enabled)
>  {
> -	uint32_t i;
> -	int32_t sid = -1;
> -
> -	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
> -		if ((struct rte_service_spec *)&rte_services[i] == service &&
> -				service_valid(i)) {
> -			sid = i;
> -			break;
> -		}
> -	}
> -
> -	if (sid == -1 || lcore >= RTE_MAX_LCORE)
> +	if (lcore >= RTE_MAX_LCORE)
>  		return -EINVAL;
> 
>  	if (!lcore_states[lcore].is_service_core)
> @@ -598,19 +587,23 @@ service_update(struct rte_service_spec *service,
> uint32_t lcore,
>  int32_t
>  rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
>  {
> -	struct rte_service_spec_impl *s;
> -	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> +	/* validate ID, or return error value */
> +	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> +		return -EINVAL;
> +
>  	uint32_t on = enabled > 0;
> -	return service_update(&s->spec, lcore, &on, 0);
> +	return service_update(id, lcore, &on, 0);
>  }
> 
>  int32_t
>  rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
>  {
> -	struct rte_service_spec_impl *s;
> -	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> +	/* validate ID, or return error value */
> +	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> +		return -EINVAL;
> +
>  	uint32_t enabled;
> -	int ret = service_update(&s->spec, lcore, 0, &enabled);
> +	int ret = service_update(id, lcore, 0, &enabled);
>  	if (ret == 0)
>  		return enabled;
>  	return ret;
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service Phil Yang
@ 2020-04-03 11:58       ` Van Haaren, Harry
  2020-04-04 18:03         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-03 11:58 UTC (permalink / raw)
  To: Phil Yang, thomas, Ananyev, Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

> From: Phil Yang <phil.yang@arm.com>
> Sent: Tuesday, March 17, 2020 1:18 AM
> To: thomas@monjalon.net; Van Haaren, Harry <harry.van.haaren@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; ruifeng.wang@arm.com;
> joyce.kong@arm.com; nd@arm.com; Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>; stable@dpdk.org
> Subject: [PATCH v3 09/12] service: avoid race condition for MT unsafe service
> 
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> There has possible that a MT unsafe service might get configured to
> run on another core while the service is running currently. This
> might result in the MT unsafe service running on multiple cores
> simultaneously. Use 'execute_lock' always when the service is
> MT unsafe.
> 
> Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>

We should put "fix" in the title, once converged on an implementation.

Regarding Fixes and stable backport, we should consider if
fixing this in stable with a performance degradation, fixing with more
complex solution, or documenting a known issue a better solution.


This fix (always taking the atomic lock) will have a negative performance
impact on existing code using services. We should investigate a way
to fix it without causing datapath performance degradation.

I think there is a way to achieve this by moving more checks/time
to the control path (lcore updating the map), and not forcing the
datapath lcore to always take an atomic.

In this particular case, we have a counter for number of iterations
that a service has done. If this increments we know that the lcore
running the service has re-entered the critical section, so would
see an updated "needs atomic" flag.

This approach may introduce a predictable branch on the datapath,
however the cost of a predictable branch vs always taking an atomic
is order(s?) of magnitude, so a branch is much preferred.

It must be possible to avoid the datapath overhead using a scheme
like this. It will likely be more complex than your proposed change
below, however if it avoids datapath performance drops I feel that
a more complex solution is worth investigating at least.

A unit test is required to validate a fix like this - although perhaps
found by inspection/review, a real-world test to validate would give
confidence.


Thoughts on such an approach?



> ---
>  lib/librte_eal/common/rte_service.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/librte_eal/common/rte_service.c
> b/lib/librte_eal/common/rte_service.c
> index 557b5a9..32a2f8a 100644
> --- a/lib/librte_eal/common/rte_service.c
> +++ b/lib/librte_eal/common/rte_service.c
> @@ -50,6 +50,10 @@ struct rte_service_spec_impl {
>  	uint8_t internal_flags;
> 
>  	/* per service statistics */
> +	/* Indicates how many cores the service is mapped to run on.
> +	 * It does not indicate the number of cores the service is running
> +	 * on currently.
> +	 */
>  	rte_atomic32_t num_mapped_cores;
>  	uint64_t calls;
>  	uint64_t cycles_spent;
> @@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t
> service_mask,
> 
>  	cs->service_active_on_lcore[i] = 1;
> 
> -	/* check do we need cmpset, if MT safe or <= 1 core
> -	 * mapped, atomic ops are not required.
> -	 */
> -	const int use_atomics = (service_mt_safe(s) == 0) &&
> -				(rte_atomic32_read(&s->num_mapped_cores) > 1);
> -	if (use_atomics) {
> +	if (service_mt_safe(s) == 0) {
>  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
>  			return -EBUSY;
> 
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly Phil Yang
@ 2020-04-03 11:58       ` Van Haaren, Harry
  2020-04-05  2:43         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-03 11:58 UTC (permalink / raw)
  To: Phil Yang, thomas, Ananyev, Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa.Nagarahalli,
	gavin.hu, ruifeng.wang, joyce.kong, nd, Honnappa Nagarahalli,
	stable

> From: Phil Yang <phil.yang@arm.com>
> Sent: Tuesday, March 17, 2020 1:18 AM
> To: thomas@monjalon.net; Van Haaren, Harry <harry.van.haaren@intel.com>;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; ruifeng.wang@arm.com;
> joyce.kong@arm.com; nd@arm.com; Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>; stable@dpdk.org
> Subject: [PATCH v3 10/12] service: identify service running on another core
> correctly
> 
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> The logic to identify if the MT unsafe service is running on another
> core can return -EBUSY spuriously. In such cases, running the service
> becomes costlier than using atomic operations. Assume that the
> application passes the right parameters and reduces the number of
> instructions for all cases.
> 
> Cc: stable@dpdk.org
> 
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>

Is this fixing broken functionality, or does it aim to only "optimize"?
Lack of "fixes" tag suggests optimization.

I'm cautious about the commit phrase "Assume that the application ...",
if the code was previously checking things, we must not stop checking
them now, this may introduce race-conditions in existing applications?

It seems like the "serialize_mt_unsafe" branch is being pushed
further down the callgraph, and instead of branching over atomics
this patch forces always executing 2 atomics?

This feels like too specific an optimization/tradeoff, without data to
backup that there are no regressions on any DPDK supported platforms.

DPDK today doesn't have a micro-benchmark to gather such perf data, 
but I would welcome one and we can have a data-driven decision.

Hope this point-of-view makes sense, -Harry

> ---
>  lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
>  1 file changed, 8 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/librte_eal/common/rte_service.c
> b/lib/librte_eal/common/rte_service.c
> index 32a2f8a..0843c3c 100644
> --- a/lib/librte_eal/common/rte_service.c
> +++ b/lib/librte_eal/common/rte_service.c
> @@ -360,7 +360,7 @@ service_runner_do_callback(struct rte_service_spec_impl
> *s,
>  /* Expects the service 's' is valid. */
>  static int32_t
>  service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
> -	    struct rte_service_spec_impl *s)
> +	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
>  {
>  	if (!s)
>  		return -EINVAL;
> @@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t
> service_mask,
> 
>  	cs->service_active_on_lcore[i] = 1;
> 
> -	if (service_mt_safe(s) == 0) {
> +	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
>  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
>  			return -EBUSY;
> 
> @@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t
> serialize_mt_unsafe)
> 
>  	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> 
> -	/* Atomically add this core to the mapped cores first, then examine if
> -	 * we can run the service. This avoids a race condition between
> -	 * checking the value, and atomically adding to the mapped count.
> +	/* Increment num_mapped_cores to indicate that the service
> +	 * is running on a core.
>  	 */
> -	if (serialize_mt_unsafe)
> -		rte_atomic32_inc(&s->num_mapped_cores);
> +	rte_atomic32_inc(&s->num_mapped_cores);
> 
> -	if (service_mt_safe(s) == 0 &&
> -			rte_atomic32_read(&s->num_mapped_cores) > 1) {
> -		if (serialize_mt_unsafe)
> -			rte_atomic32_dec(&s->num_mapped_cores);
> -		return -EBUSY;
> -	}
> -
> -	int ret = service_run(id, cs, UINT64_MAX, s);
> +	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
> 
> -	if (serialize_mt_unsafe)
> -		rte_atomic32_dec(&s->num_mapped_cores);
> +	rte_atomic32_dec(&s->num_mapped_cores);
> 
>  	return ret;
>  }
> @@ -449,7 +439,7 @@ service_runner_func(void *arg)
>  			if (!service_valid(i))
>  				continue;
>  			/* return value ignored as no change to code flow */
> -			service_run(i, cs, service_mask, service_get(i));
> +			service_run(i, cs, service_mask, service_get(i), 1);
>  		}
> 
>  		cs->loops++;
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-03 11:58       ` Van Haaren, Harry
@ 2020-04-04 18:03         ` Honnappa Nagarahalli
  2020-04-08 18:05           ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-04 18:03 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, Honnappa Nagarahalli, nd

<snip>

> > Subject: [PATCH v3 09/12] service: avoid race condition for MT unsafe
> > service
> >
> > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >
> > There has possible that a MT unsafe service might get configured to
> > run on another core while the service is running currently. This might
> > result in the MT unsafe service running on multiple cores
> > simultaneously. Use 'execute_lock' always when the service is MT
> > unsafe.
> >
> > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> 
> We should put "fix" in the title, once converged on an implementation.
Ok, will replace 'avoid' with 'fix' (once we agree on the solution)

> 
> Regarding Fixes and stable backport, we should consider if fixing this in stable
> with a performance degradation, fixing with more complex solution, or
> documenting a known issue a better solution.
> 
> 
> This fix (always taking the atomic lock) will have a negative performance
> impact on existing code using services. We should investigate a way to fix it
> without causing datapath performance degradation.
Trying to gauge the impact on the existing applications...
The documentation does not explicitly disallow run time mapping of cores to service.
1) If the applications are mapping the cores to services at run time, they are running with a bug. IMO, bug fix resulting in a performance drop should be acceptable.
2) If the service is configured to run on single core (num_mapped_cores == 1), but service is set to MT unsafe - this will have a (possible) performance impact.
	a) This can be solved by setting the service to MT safe and can be documented. This might be a reasonable solution for applications which are compiling with
                   future DPDK releases.
	b) We can also solve this using symbol versioning - the old version of this function will use the old code, the new version of this function will use the code in
                   this patch. So, if the application is run with future DPDK releases without recompiling, it will continue to use the old version. If the application is compiled 
                   with future releases, they can use solution in 2a. We also should think if this is an appropriate solution as this would force 1) to recompile to get the fix.
3) If the service is configured to run on multiple cores (num_mapped_cores > 1), then for those applications, the lock is being taken already. These applications might see some improvements as this patch removes few instructions.

> 
> I think there is a way to achieve this by moving more checks/time to the
> control path (lcore updating the map), and not forcing the datapath lcore to
> always take an atomic.
I think 2a above is the solution.

> 
> In this particular case, we have a counter for number of iterations that a
Which counter are you thinking about?
All the counters I checked are not atomic operations currently. If we are going to use counters they have to be atomic, which means additional cycles in the data path.

> service has done. If this increments we know that the lcore running the
> service has re-entered the critical section, so would see an updated "needs
> atomic" flag.
> 
> This approach may introduce a predictable branch on the datapath, however
> the cost of a predictable branch vs always taking an atomic is order(s?) of
> magnitude, so a branch is much preferred.
> 
> It must be possible to avoid the datapath overhead using a scheme like this. It
> will likely be more complex than your proposed change below, however if it
> avoids datapath performance drops I feel that a more complex solution is
> worth investigating at least.
I do not completely understand the approach you are proposing, may be you can elaborate more. But, it seems to be based on a counter approach. Following is my assessment on what happens if we use a counter. Let us say we kept track of how many cores are running the service currently. We need an atomic counter other than 'num_mapped_cores'. Let us call that counter 'num_current_cores'. The code to call the service would look like below.

1) rte_atomic32_inc(&num_current_cores); /* this results in a full memory barrier */
2) if (__atomic_load_n(&num_current_cores, __ATOMIC_ACQUIRE) == 1) { /* rte_atomic_read is not enough here as it does not provide the required memory barrier for any architecture */
3) 	run_service(); /* Call the service */
4) }
5) rte_atomic32_sub(&num_current_cores); /* Calling rte_atomic32_clear is not enough as it is not an atomic operation and does not provide the required memory barrier */

But the above code has race conditions in lines 1 and 2. It is possible that none of the cores will ever get to run the service as they all could simultaneously increment the counter. Hence lines 1 and 2 together need to be atomic, which is nothing but 'compare-exchange' operation.

BTW, the current code has a bug where it calls 'rte_atomic_clear(&s->execute_lock)', it is missing memory barriers which results in clearing the execute_lock before the service has completed running. I suggest changing the 'execute_lock' to rte_spinlock_t and using rte_spinlock_try_lock and rte_spinlock_unlock APIs.

> 
> A unit test is required to validate a fix like this - although perhaps found by
> inspection/review, a real-world test to validate would give confidence.
Agree, need to have a test case.

> 
> 
> Thoughts on such an approach?
> 
> 
> 
> > ---
> >  lib/librte_eal/common/rte_service.c | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/rte_service.c
> > b/lib/librte_eal/common/rte_service.c
> > index 557b5a9..32a2f8a 100644
> > --- a/lib/librte_eal/common/rte_service.c
> > +++ b/lib/librte_eal/common/rte_service.c
> > @@ -50,6 +50,10 @@ struct rte_service_spec_impl {
> >  	uint8_t internal_flags;
> >
> >  	/* per service statistics */
> > +	/* Indicates how many cores the service is mapped to run on.
> > +	 * It does not indicate the number of cores the service is running
> > +	 * on currently.
> > +	 */
> >  	rte_atomic32_t num_mapped_cores;
> >  	uint64_t calls;
> >  	uint64_t cycles_spent;
> > @@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs,
> > uint64_t service_mask,
> >
> >  	cs->service_active_on_lcore[i] = 1;
> >
> > -	/* check do we need cmpset, if MT safe or <= 1 core
> > -	 * mapped, atomic ops are not required.
> > -	 */
> > -	const int use_atomics = (service_mt_safe(s) == 0) &&
> > -				(rte_atomic32_read(&s-
> >num_mapped_cores) > 1);
> > -	if (use_atomics) {
> > +	if (service_mt_safe(s) == 0) {
> >  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
> >  			return -EBUSY;
> >
> > --
> > 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly
  2020-04-03 11:58       ` Van Haaren, Harry
@ 2020-04-05  2:43         ` Honnappa Nagarahalli
  0 siblings, 0 replies; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-05  2:43 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, Honnappa Nagarahalli, nd

<snip>

> > Subject: [PATCH v3 10/12] service: identify service running on another
> > core correctly
> >
> > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >
> > The logic to identify if the MT unsafe service is running on another
> > core can return -EBUSY spuriously. In such cases, running the service
> > becomes costlier than using atomic operations. Assume that the
> > application passes the right parameters and reduces the number of
> > instructions for all cases.
> >
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> 
> Is this fixing broken functionality, or does it aim to only "optimize"?
> Lack of "fixes" tag suggests optimization.
Good point, it is missing the 'Fixes' tag. Relooking at the code, following are few problems that I observe, please correct me if I am wrong:
1) Looking at the API rte_service_map_lcore_set, 'num_mapped_cores' keeps track of the number of cores the service is mapped to. However, in the function 'rte_service_run_iter_on_app_lcore', the 'num_mapped_cores' is incremented only if 'serialize_mt_unsafe' is set. This will return incorrect result in 'rte_service_runstate_get' API when 'serialize_mt_unsafe' is not set.

2) Since incrementing 'num_mapped_cores' and checking its value is not an atomic operation, it introduces race conditions. Consider the current code snippet from the function 'rte_service_run_iter_on_app_lcore'.

1) if (serialize_mt_unsafe)
2)		rte_atomic32_inc(&s->num_mapped_cores);
3) if (service_mt_safe(s) == 0 && rte_atomic32_read(&s->num_mapped_cores) > 1) {
4) 		if (serialize_mt_unsafe)
5)			rte_atomic32_dec(&s->num_mapped_cores);
6)		return -EBUSY;
7)	}

It is possible that more than 1 thread is executing line 3) concurrently, in which case all of them will hit line 6). Due to this, it is possible that the service might never run or it might take multiple attempts to run the service wasting cycles.

If you agree these are bugs, I can add 'fixes' tag.

> 
> I'm cautious about the commit phrase "Assume that the application ...", if the
> code was previously checking things, we must not stop checking them now,
> this may introduce race-conditions in existing applications?
What I meant here is, let us believe in what the application says. i.e. if the applications sets 'serialize_mt_unsafe' to 1, let us assume that the service is infact 'unsafe'.

> 
> It seems like the "serialize_mt_unsafe" branch is being pushed further down
> the callgraph, and instead of branching over atomics this patch forces always
> executing 2 atomics?
Yes, that is correct. Please see explanation in 1) above.

> 
> This feels like too specific an optimization/tradeoff, without data to backup
> that there are no regressions on any DPDK supported platforms.
Apologies for missing the 'Fixes' tag, this patch is a bug fix.

> 
> DPDK today doesn't have a micro-benchmark to gather such perf data, but I
> would welcome one and we can have a data-driven decision.
When this was developed, was the logic to avoid taking the lock measured to provide any improvements?

> 
> Hope this point-of-view makes sense, -Harry
> 
> > ---
> >  lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
> >  1 file changed, 8 insertions(+), 18 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/rte_service.c
> > b/lib/librte_eal/common/rte_service.c
> > index 32a2f8a..0843c3c 100644
> > --- a/lib/librte_eal/common/rte_service.c
> > +++ b/lib/librte_eal/common/rte_service.c
> > @@ -360,7 +360,7 @@ service_runner_do_callback(struct
> > rte_service_spec_impl *s,
> >  /* Expects the service 's' is valid. */  static int32_t
> > service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
> > -	    struct rte_service_spec_impl *s)
> > +	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
> >  {
> >  	if (!s)
> >  		return -EINVAL;
> > @@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs,
> > uint64_t service_mask,
> >
> >  	cs->service_active_on_lcore[i] = 1;
> >
> > -	if (service_mt_safe(s) == 0) {
> > +	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
> >  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
> >  			return -EBUSY;
> >
> > @@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id,
> > uint32_t
> > serialize_mt_unsafe)
> >
> >  	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> >
> > -	/* Atomically add this core to the mapped cores first, then examine if
> > -	 * we can run the service. This avoids a race condition between
> > -	 * checking the value, and atomically adding to the mapped count.
> > +	/* Increment num_mapped_cores to indicate that the service
> > +	 * is running on a core.
> >  	 */
> > -	if (serialize_mt_unsafe)
> > -		rte_atomic32_inc(&s->num_mapped_cores);
> > +	rte_atomic32_inc(&s->num_mapped_cores);
> >
> > -	if (service_mt_safe(s) == 0 &&
> > -			rte_atomic32_read(&s->num_mapped_cores) > 1) {
> > -		if (serialize_mt_unsafe)
> > -			rte_atomic32_dec(&s->num_mapped_cores);
> > -		return -EBUSY;
> > -	}
> > -
> > -	int ret = service_run(id, cs, UINT64_MAX, s);
> > +	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
> >
> > -	if (serialize_mt_unsafe)
> > -		rte_atomic32_dec(&s->num_mapped_cores);
> > +	rte_atomic32_dec(&s->num_mapped_cores);
> >
> >  	return ret;
> >  }
> > @@ -449,7 +439,7 @@ service_runner_func(void *arg)
> >  			if (!service_valid(i))
> >  				continue;
> >  			/* return value ignored as no change to code flow */
> > -			service_run(i, cs, service_mask, service_get(i));
> > +			service_run(i, cs, service_mask, service_get(i), 1);
> >  		}
> >
> >  		cs->loops++;
> > --
> > 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 08/12] service: remove redundant code
  2020-04-03 11:58       ` Van Haaren, Harry
@ 2020-04-05 18:35         ` Honnappa Nagarahalli
  2020-04-08 10:15           ` Phil Yang
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-05 18:35 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, Stable, Honnappa Nagarahalli, nd

<snip>

> >
> > The service id validation is verified in the calling function, remove
> > the redundant code inside the service_update function.
> >
> > Fixes: 21698354c832 ("service: introduce service cores concept")
> > Cc: Stable@dpdk.org
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> 
> Same comment as patch 7/12, is this really a "Fix"? This functionality is not
> "broken" in  the current code? And is there value in porting to stable? I'd see
> this as unnecessary churn.
> 
> As before, it is a valid cleanup (thanks), and I'd like to take it for new DPDK
> releases.
> 
> Happy to Ack without Fixes or Cc Stable, if that's acceptable to you?
Agreed.

> 
> 
> 
> > ---
> >  lib/librte_eal/common/rte_service.c | 31
> > ++++++++++++-------------------
> >  1 file changed, 12 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/rte_service.c
> > b/lib/librte_eal/common/rte_service.c
> > index 2117726..557b5a9 100644
> > --- a/lib/librte_eal/common/rte_service.c
> > +++ b/lib/librte_eal/common/rte_service.c
> > @@ -552,21 +552,10 @@ rte_service_start_with_defaults(void)
> >  }
> >
> >  static int32_t
> > -service_update(struct rte_service_spec *service, uint32_t lcore,
> > +service_update(uint32_t sid, uint32_t lcore,
> >  		uint32_t *set, uint32_t *enabled)
'set' parameter does not need be passed by reference, pass by value is enough.

> >  {
> > -	uint32_t i;
> > -	int32_t sid = -1;
> > -
> > -	for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
> > -		if ((struct rte_service_spec *)&rte_services[i] == service &&
> > -				service_valid(i)) {
> > -			sid = i;
> > -			break;
> > -		}
> > -	}
> > -
> > -	if (sid == -1 || lcore >= RTE_MAX_LCORE)
> > +	if (lcore >= RTE_MAX_LCORE)
> >  		return -EINVAL;
The validations look somewhat inconsistent in service_update function, we are validating some parameters and not some.
Suggest bringing the validation of the service id also into this function and remove it from the calling functions.

> >
> >  	if (!lcore_states[lcore].is_service_core)
> > @@ -598,19 +587,23 @@ service_update(struct rte_service_spec *service,
> > uint32_t lcore,  int32_t  rte_service_map_lcore_set(uint32_t id,
> > uint32_t lcore, uint32_t enabled)  {
> > -	struct rte_service_spec_impl *s;
> > -	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> > +	/* validate ID, or return error value */
> > +	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> > +		return -EINVAL;
> > +
> >  	uint32_t on = enabled > 0;
We do not need the above line. 'enabled' can be passed directly to 'service_update'.

> > -	return service_update(&s->spec, lcore, &on, 0);
> > +	return service_update(id, lcore, &on, 0);
> >  }
> >
> >  int32_t
> >  rte_service_map_lcore_get(uint32_t id, uint32_t lcore)  {
> > -	struct rte_service_spec_impl *s;
> > -	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> > +	/* validate ID, or return error value */
> > +	if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> > +		return -EINVAL;
> > +
> >  	uint32_t enabled;
> > -	int ret = service_update(&s->spec, lcore, 0, &enabled);
> > +	int ret = service_update(id, lcore, 0, &enabled);
> >  	if (ret == 0)
> >  		return enabled;
> >  	return ret;
> > --
> > 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions Phil Yang
  2020-04-03 11:57       ` Van Haaren, Harry
@ 2020-04-05 21:35       ` Honnappa Nagarahalli
  2020-04-08 10:14         ` Phil Yang
       [not found]       ` <1587659482-27133-1-git-send-email-phil.yang@arm.com>
  2 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-05 21:35 UTC (permalink / raw)
  To: Phil Yang, thomas, harry.van.haaren, konstantin.ananyev, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, Honnappa Nagarahalli, nd

<snip>

> Subject: [PATCH v3 07/12] service: remove rte prefix from static functions
> 
> Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
> Fixes: 21698354c832 ("service: introduce service cores concept")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  lib/librte_eal/common/rte_service.c | 18 +++++++++---------
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_eal/common/rte_service.c
> b/lib/librte_eal/common/rte_service.c
> index b0b78ba..2117726 100644
> --- a/lib/librte_eal/common/rte_service.c
> +++ b/lib/librte_eal/common/rte_service.c
> @@ -336,7 +336,7 @@ rte_service_runstate_get(uint32_t id)  }
> 
>  static inline void
> -rte_service_runner_do_callback(struct rte_service_spec_impl *s,
> +service_runner_do_callback(struct rte_service_spec_impl *s,
>  			       struct core_state *cs, uint32_t service_idx)  {
>  	void *userdata = s->spec.callback_userdata; @@ -379,10 +379,10
> @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
>  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
>  			return -EBUSY;
> 
> -		rte_service_runner_do_callback(s, cs, i);
> +		service_runner_do_callback(s, cs, i);
>  		rte_atomic32_clear(&s->execute_lock);
>  	} else
> -		rte_service_runner_do_callback(s, cs, i);
> +		service_runner_do_callback(s, cs, i);
> 
>  	return 0;
>  }
> @@ -436,7 +436,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id,
> uint32_t serialize_mt_unsafe)  }
> 
>  static int32_t
> -rte_service_runner_func(void *arg)
> +service_runner_func(void *arg)
>  {
>  	RTE_SET_USED(arg);
>  	uint32_t i;
This is a minor comment.
Since you are touching 'service_runner_func', please consider doing the below improvement:

struct core_state *cs = &lcore_states[lcore];
while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {   

The while above can be changed as follows to make it more readable

while (cs->runstate == RUNSTATE_RUNNING) {   

> @@ -706,7 +706,7 @@ rte_service_lcore_start(uint32_t lcore)
>  	 */
>  	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
> 
> -	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
> +	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
>  	/* returns -EBUSY if the core is already launched, 0 on success */
>  	return ret;
>  }
> @@ -785,7 +785,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t
> attr_id,  }
> 
<snip>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-04-03 11:57       ` Van Haaren, Harry
@ 2020-04-08 10:14         ` Phil Yang
  2020-04-08 10:36           ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Phil Yang @ 2020-04-08 10:14 UTC (permalink / raw)
  To: Van Haaren, Harry, thomas, Ananyev, Konstantin, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang, Joyce Kong, nd, stable, nd

> -----Original Message-----
> From: Van Haaren, Harry <harry.van.haaren@intel.com>
> Sent: Friday, April 3, 2020 7:58 PM
> To: Phil Yang <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev,
> Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com;
> dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu <Gavin.Hu@arm.com>;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce Kong
> <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org
> Subject: RE: [PATCH v3 07/12] service: remove rte prefix from static functions
> 
> > From: Phil Yang <phil.yang@arm.com>
> > Sent: Tuesday, March 17, 2020 1:18 AM
> > To: thomas@monjalon.net; Van Haaren, Harry
> <harry.van.haaren@intel.com>;
> > Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > stephen@networkplumber.org; maxime.coquelin@redhat.com;
> dev@dpdk.org
> > Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com;
> > Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com;
> ruifeng.wang@arm.com;
> > joyce.kong@arm.com; nd@arm.com; stable@dpdk.org
> > Subject: [PATCH v3 07/12] service: remove rte prefix from static functions
> >
> > Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
> > Fixes: 21698354c832 ("service: introduce service cores concept")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> 
> This patchset needs a rebase since the EAL file movement got merged,
> however I'll review here so we can include some Acks etc and make
> progress.
> 
> Is this really a "Fix"? The internal function names were not exported
> in the .map file, so are not part of public ABI. This is an internal
> naming improvement (thanks for doing cleanup), but I don't think the
> Fixes: tags make sense?
> 
> Also I'm not sure if we want to port this patch back to stable? Changing
> (internal) function names seems like unnecessary churn, and hence risk to a
> stable release, without any benefit?
OK.
I will remove these tags in the next version and split the service core patches from the original series into a series by itself.

Thanks,
Phil

> 
> ---
> 
> <snip patch diff>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-04-05 21:35       ` Honnappa Nagarahalli
@ 2020-04-08 10:14         ` Phil Yang
  0 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-04-08 10:14 UTC (permalink / raw)
  To: Honnappa Nagarahalli, thomas, harry.van.haaren,
	konstantin.ananyev, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, nd, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Monday, April 6, 2020 5:35 AM
> To: Phil Yang <Phil.Yang@arm.com>; thomas@monjalon.net;
> harry.van.haaren@intel.com; konstantin.ananyev@intel.com;
> stephen@networkplumber.org; maxime.coquelin@redhat.com;
> dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Gavin Hu <Gavin.Hu@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Joyce Kong <Joyce.Kong@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v3 07/12] service: remove rte prefix from static functions
> 
> <snip>
> 
> > Subject: [PATCH v3 07/12] service: remove rte prefix from static functions
> >
> > Fixes: 3cf5eb1546ed ("service: fix and refactor atomic service accesses")
> > Fixes: 21698354c832 ("service: introduce service cores concept")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> >  lib/librte_eal/common/rte_service.c | 18 +++++++++---------
> >  1 file changed, 9 insertions(+), 9 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/rte_service.c
> > b/lib/librte_eal/common/rte_service.c
> > index b0b78ba..2117726 100644
> > --- a/lib/librte_eal/common/rte_service.c
> > +++ b/lib/librte_eal/common/rte_service.c
> > @@ -336,7 +336,7 @@ rte_service_runstate_get(uint32_t id)  }
> >
> >  static inline void
> > -rte_service_runner_do_callback(struct rte_service_spec_impl *s,
> > +service_runner_do_callback(struct rte_service_spec_impl *s,
> >  			       struct core_state *cs, uint32_t service_idx)  {
> >  	void *userdata = s->spec.callback_userdata; @@ -379,10 +379,10
> > @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
> >  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0,
> 1))
> >  			return -EBUSY;
> >
> > -		rte_service_runner_do_callback(s, cs, i);
> > +		service_runner_do_callback(s, cs, i);
> >  		rte_atomic32_clear(&s->execute_lock);
> >  	} else
> > -		rte_service_runner_do_callback(s, cs, i);
> > +		service_runner_do_callback(s, cs, i);
> >
> >  	return 0;
> >  }
> > @@ -436,7 +436,7 @@ rte_service_run_iter_on_app_lcore(uint32_t id,
> > uint32_t serialize_mt_unsafe)  }
> >
> >  static int32_t
> > -rte_service_runner_func(void *arg)
> > +service_runner_func(void *arg)
> >  {
> >  	RTE_SET_USED(arg);
> >  	uint32_t i;
> This is a minor comment.
> Since you are touching 'service_runner_func', please consider doing the
> below improvement:
> 
> struct core_state *cs = &lcore_states[lcore];
> while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {
> 
> The while above can be changed as follows to make it more readable
> 
> while (cs->runstate == RUNSTATE_RUNNING) {
OK. I will clean it up in the next version.

Thanks,
Phil

> 
> > @@ -706,7 +706,7 @@ rte_service_lcore_start(uint32_t lcore)
> >  	 */
> >  	lcore_states[lcore].runstate = RUNSTATE_RUNNING;
> >
> > -	int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
> > +	int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
> >  	/* returns -EBUSY if the core is already launched, 0 on success */
> >  	return ret;
> >  }
> > @@ -785,7 +785,7 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t
> > attr_id,  }
> >
> <snip>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 08/12] service: remove redundant code
  2020-04-05 18:35         ` Honnappa Nagarahalli
@ 2020-04-08 10:15           ` Phil Yang
  0 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-04-08 10:15 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Van Haaren, Harry, thomas, Ananyev,
	Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, Stable, nd, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Monday, April 6, 2020 2:35 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org;
> maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Gavin Hu <Gavin.Hu@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Joyce Kong <Joyce.Kong@arm.com>; nd
> <nd@arm.com>; Stable@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v3 08/12] service: remove redundant code
> 
> <snip>
> 
> > >
> > > The service id validation is verified in the calling function, remove
> > > the redundant code inside the service_update function.
> > >
> > > Fixes: 21698354c832 ("service: introduce service cores concept")
> > > Cc: Stable@dpdk.org
> > >
> > > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >
> >
> > Same comment as patch 7/12, is this really a "Fix"? This functionality is not
> > "broken" in  the current code? And is there value in porting to stable? I'd
> see
> > this as unnecessary churn.
> >
> > As before, it is a valid cleanup (thanks), and I'd like to take it for new DPDK
> > releases.
> >
> > Happy to Ack without Fixes or Cc Stable, if that's acceptable to you?
> Agreed.

Agreed. 

> 
> >
> >
> >
> > > ---
> > >  lib/librte_eal/common/rte_service.c | 31
> > > ++++++++++++-------------------
> > >  1 file changed, 12 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/rte_service.c
> > > b/lib/librte_eal/common/rte_service.c
> > > index 2117726..557b5a9 100644
> > > --- a/lib/librte_eal/common/rte_service.c
> > > +++ b/lib/librte_eal/common/rte_service.c
> > > @@ -552,21 +552,10 @@ rte_service_start_with_defaults(void)
> > >  }
> > >
> > >  static int32_t
> > > -service_update(struct rte_service_spec *service, uint32_t lcore,
> > > +service_update(uint32_t sid, uint32_t lcore,
> > >  uint32_t *set, uint32_t *enabled)
> 'set' parameter does not need be passed by reference, pass by value is
> enough.
Agreed.
 
> 
> > >  {
> > > -uint32_t i;
> > > -int32_t sid = -1;
> > > -
> > > -for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
> > > -if ((struct rte_service_spec *)&rte_services[i] == service &&
> > > -service_valid(i)) {
> > > -sid = i;
> > > -break;
> > > -}
> > > -}
> > > -
> > > -if (sid == -1 || lcore >= RTE_MAX_LCORE)
> > > +if (lcore >= RTE_MAX_LCORE)
> > >  return -EINVAL;
> The validations look somewhat inconsistent in service_update function, we
> are validating some parameters and not some.
> Suggest bringing the validation of the service id also into this function and
> remove it from the calling functions.
Agreed. I will update it in the next version.

> 
> > >
> > >  if (!lcore_states[lcore].is_service_core)
> > > @@ -598,19 +587,23 @@ service_update(struct rte_service_spec
> *service,
> > > uint32_t lcore,  int32_t  rte_service_map_lcore_set(uint32_t id,
> > > uint32_t lcore, uint32_t enabled)  {
> > > -struct rte_service_spec_impl *s;
> > > -SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> > > +/* validate ID, or return error value */
> > > +if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> > > +return -EINVAL;
> > > +
> > >  uint32_t on = enabled > 0;
> We do not need the above line. 'enabled' can be passed directly to
> 'service_update'.
Agreed.

> 
> > > -return service_update(&s->spec, lcore, &on, 0);
> > > +return service_update(id, lcore, &on, 0);
> > >  }
> > >
> > >  int32_t
> > >  rte_service_map_lcore_get(uint32_t id, uint32_t lcore)  {
> > > -struct rte_service_spec_impl *s;
> > > -SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> > > +/* validate ID, or return error value */
> > > +if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
> > > +return -EINVAL;
> > > +
> > >  uint32_t enabled;
> > > -int ret = service_update(&s->spec, lcore, 0, &enabled);
> > > +int ret = service_update(id, lcore, 0, &enabled);
> > >  if (ret == 0)
> > >  return enabled;
> > >  return ret;
> > > --
> > > 2.7.4
> 


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-04-08 10:14         ` Phil Yang
@ 2020-04-08 10:36           ` Van Haaren, Harry
  2020-04-08 10:49             ` Phil Yang
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-08 10:36 UTC (permalink / raw)
  To: Phil Yang, thomas, Ananyev, Konstantin, stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang, Joyce Kong, nd, stable, nd

> -----Original Message-----
> From: Phil Yang <Phil.Yang@arm.com>
> Sent: Wednesday, April 8, 2020 11:15 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; thomas@monjalon.net;
> Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu
> <Gavin.Hu@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce Kong
> <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org; nd <nd@arm.com>
> Subject: RE: [PATCH v3 07/12] service: remove rte prefix from static functions
<snip>
> > Is this really a "Fix"? The internal function names were not exported
> > in the .map file, so are not part of public ABI. This is an internal
> > naming improvement (thanks for doing cleanup), but I don't think the
> > Fixes: tags make sense?
> >
> > Also I'm not sure if we want to port this patch back to stable? Changing
> > (internal) function names seems like unnecessary churn, and hence risk to a
> > stable release, without any benefit?
> OK.
> I will remove these tags in the next version and split the service core
> patches from the original series into a series by itself.

Cool - good idea to split.

Perhaps we should focus on getting bugfixes in for the existing code, before doing cleanup? It would make backports easier if churn is minimal.

Suggesting patches order (first to last)
1. bugfixes/things to backport
2. cleanups
3. C11 atomic optimizations


> Thanks,
> Phil

Thanks, and I'll get to reading/reviewing your and Honnappa's feedback later today.

-H 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions
  2020-04-08 10:36           ` Van Haaren, Harry
@ 2020-04-08 10:49             ` Phil Yang
  0 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-04-08 10:49 UTC (permalink / raw)
  To: Van Haaren, Harry, thomas, Ananyev, Konstantin, stephen,
	maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang, Joyce Kong, nd, stable, nd, nd

> -----Original Message-----
> From: Van Haaren, Harry <harry.van.haaren@intel.com>
> Sent: Wednesday, April 8, 2020 6:37 PM
> To: Phil Yang <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev,
> Konstantin <konstantin.ananyev@intel.com>;
> stephen@networkplumber.org; maxime.coquelin@redhat.com;
> dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Gavin Hu <Gavin.Hu@arm.com>;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce Kong
> <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org; nd
> <nd@arm.com>
> Subject: RE: [PATCH v3 07/12] service: remove rte prefix from static functions
> 
> > -----Original Message-----
> > From: Phil Yang <Phil.Yang@arm.com>
> > Sent: Wednesday, April 8, 2020 11:15 AM
> > To: Van Haaren, Harry <harry.van.haaren@intel.com>;
> thomas@monjalon.net;
> > Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > stephen@networkplumber.org; maxime.coquelin@redhat.com;
> dev@dpdk.org
> > Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com;
> > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu
> > <Gavin.Hu@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce
> Kong
> > <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org; nd
> <nd@arm.com>
> > Subject: RE: [PATCH v3 07/12] service: remove rte prefix from static
> functions
> <snip>
> > > Is this really a "Fix"? The internal function names were not exported
> > > in the .map file, so are not part of public ABI. This is an internal
> > > naming improvement (thanks for doing cleanup), but I don't think the
> > > Fixes: tags make sense?
> > >
> > > Also I'm not sure if we want to port this patch back to stable? Changing
> > > (internal) function names seems like unnecessary churn, and hence risk
> to a
> > > stable release, without any benefit?
> > OK.
> > I will remove these tags in the next version and split the service core
> > patches from the original series into a series by itself.
> 
> Cool - good idea to split.
> 
> Perhaps we should focus on getting bugfixes in for the existing code, before
> doing cleanup? It would make backports easier if churn is minimal.
> 
> Suggesting patches order (first to last)
> 1. bugfixes/things to backport
> 2. cleanups
> 3. C11 atomic optimizations

That is a good idea. I will follow this order.

> 
> 
> > Thanks,
> > Phil
> 
> Thanks, and I'll get to reading/reviewing your and Honnappa's feedback later
> today.
> 
> -H

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-04 18:03         ` Honnappa Nagarahalli
@ 2020-04-08 18:05           ` Van Haaren, Harry
  2020-04-09  1:31             ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-08 18:05 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Saturday, April 4, 2020 7:03 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org;
> maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Gavin Hu <Gavin.Hu@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce Kong
> <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v3 09/12] service: avoid race condition for MT unsafe
> service
> 
> <snip>
> 
> > > Subject: [PATCH v3 09/12] service: avoid race condition for MT unsafe
> > > service
> > >
> > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > >
> > > There has possible that a MT unsafe service might get configured to
> > > run on another core while the service is running currently. This might
> > > result in the MT unsafe service running on multiple cores
> > > simultaneously. Use 'execute_lock' always when the service is MT
> > > unsafe.
> > >
> > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> >
> > We should put "fix" in the title, once converged on an implementation.
> Ok, will replace 'avoid' with 'fix' (once we agree on the solution)
> 
> >
> > Regarding Fixes and stable backport, we should consider if fixing this in
> stable
> > with a performance degradation, fixing with more complex solution, or
> > documenting a known issue a better solution.
> >
> >
> > This fix (always taking the atomic lock) will have a negative performance
> > impact on existing code using services. We should investigate a way to fix
> it
> > without causing datapath performance degradation.
> Trying to gauge the impact on the existing applications...
> The documentation does not explicitly disallow run time mapping of cores to
> service.
> 1) If the applications are mapping the cores to services at run time, they are
> running with a bug. IMO, bug fix resulting in a performance drop should be
> acceptable.
> 2) If the service is configured to run on single core (num_mapped_cores == 1),
> but service is set to MT unsafe - this will have a (possible) performance
> impact.
> 	a) This can be solved by setting the service to MT safe and can be
> documented. This might be a reasonable solution for applications which are
> compiling with
>                    future DPDK releases.
> 	b) We can also solve this using symbol versioning - the old version of
> this function will use the old code, the new version of this function will use
> the code in
>                    this patch. So, if the application is run with future DPDK
> releases without recompiling, it will continue to use the old version. If the
> application is compiled
>                    with future releases, they can use solution in 2a. We also
> should think if this is an appropriate solution as this would force 1) to
> recompile to get the fix.
> 3) If the service is configured to run on multiple cores (num_mapped_cores >
> 1), then for those applications, the lock is being taken already. These
> applications might see some improvements as this patch removes few
> instructions.
>
> >
> > I think there is a way to achieve this by moving more checks/time to the
> > control path (lcore updating the map), and not forcing the datapath lcore to
> > always take an atomic.
> I think 2a above is the solution.

2a above is e.g. the Eventdev SW routines like Rx/Tx scheduler services. 
We should strive to not reduce datapath performance at all here.


> > In this particular case, we have a counter for number of iterations that a
> Which counter are you thinking about?
> All the counters I checked are not atomic operations currently. If we are
> going to use counters they have to be atomic, which means additional cycles in
> the data path.

I'll try to explain the concept better, take this example:
 - One service core is mapped to a MT_UNSAFE service, like event/sw pmd
 - Application wants to map a 2nd lcore to the same service
 - You point out that today there is a race over the lock
    -- the lock is not taken if (num_mapped_lcores == 1)
    -- this avoids an atomic lock/unlock on the datapath

To achieve our desired goal;
 - control thread doing mapping performs the following operations
    -- write service->num_mapped_lcores++ (atomic not required, only single-writer allowed by APIs)
    -- MFENCE (full st-ld barrier) to flush write, and force later loads to issue after
    -- read the "calls_per_service" counter for each lcores, add them up.
    ---- Wait :)
    -- re-read the "calls_per_service", and ensure the count has changed.
    ---- The fact that the calls_per_service has changed ensures the service-lcore
         has seen the new "num_mapped_cores" value, and has now taken the lock!
    -- *now* it is safe to map the 2nd lcore to the service

There is a caveat here that the increments to the "calls_per_service" variable
must become globally-observable. To force this immediately would require a
write memory barrier, which could impact datapath performance. Given the service
is now taking a lock, the unlock() thereof would ensure the "calls_per_service"
is flushed to memory.

Note: we could use calls_per_service, or add a new variable to the service struct.
Writes to this do not need to be atomic, as it is either mapped to a single core,
or else there's a lock around it.


> > service has done. If this increments we know that the lcore running the
> > service has re-entered the critical section, so would see an updated "needs
> > atomic" flag.
> >
> > This approach may introduce a predictable branch on the datapath, however
> > the cost of a predictable branch vs always taking an atomic is order(s?) of
> > magnitude, so a branch is much preferred.
> >
> > It must be possible to avoid the datapath overhead using a scheme like this.
> It
> > will likely be more complex than your proposed change below, however if it
> > avoids datapath performance drops I feel that a more complex solution is
> > worth investigating at least.

> I do not completely understand the approach you are proposing, may be you can
> elaborate more.

Expanded above, showing a possible solution that does not require additional
atomics on the datapath.


> But, it seems to be based on a counter approach. Following is
> my assessment on what happens if we use a counter. Let us say we kept track of
> how many cores are running the service currently. We need an atomic counter
> other than 'num_mapped_cores'. Let us call that counter 'num_current_cores'.
> The code to call the service would look like below.
> 
> 1) rte_atomic32_inc(&num_current_cores); /* this results in a full memory
> barrier */
> 2) if (__atomic_load_n(&num_current_cores, __ATOMIC_ACQUIRE) == 1) { /*
> rte_atomic_read is not enough here as it does not provide the required memory
> barrier for any architecture */
> 3) 	run_service(); /* Call the service */
> 4) }
> 5) rte_atomic32_sub(&num_current_cores); /* Calling rte_atomic32_clear is not
> enough as it is not an atomic operation and does not provide the required
> memory barrier */
> 
> But the above code has race conditions in lines 1 and 2. It is possible that
> none of the cores will ever get to run the service as they all could
> simultaneously increment the counter. Hence lines 1 and 2 together need to be
> atomic, which is nothing but 'compare-exchange' operation.
> 
> BTW, the current code has a bug where it calls 'rte_atomic_clear(&s-
> >execute_lock)', it is missing memory barriers which results in clearing the
> execute_lock before the service has completed running. I suggest changing the
> 'execute_lock' to rte_spinlock_t and using rte_spinlock_try_lock and
> rte_spinlock_unlock APIs.

I don't think a spinlock is what we want here:

The idea is that a service-lcore can be mapped to multiple services.
If one service is already being run (by another thread), we do not want to
spin here waiting for it to become "free" to run by this thread, it should
continue to the next service that it is mapped to.


> >
> > A unit test is required to validate a fix like this - although perhaps found
> by
> > inspection/review, a real-world test to validate would give confidence.
> Agree, need to have a test case.
> 
> >
> >
> > Thoughts on such an approach?
> >
<snip patch contents>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-08 18:05           ` Van Haaren, Harry
@ 2020-04-09  1:31             ` Honnappa Nagarahalli
  2020-04-09 16:46               ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-09  1:31 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, Honnappa Nagarahalli, nd

<snip>

> >
> > > > Subject: [PATCH v3 09/12] service: avoid race condition for MT
> > > > unsafe service
> > > >
> > > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > >
> > > > There has possible that a MT unsafe service might get configured
> > > > to run on another core while the service is running currently.
> > > > This might result in the MT unsafe service running on multiple
> > > > cores simultaneously. Use 'execute_lock' always when the service
> > > > is MT unsafe.
> > > >
> > > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > >
> > > We should put "fix" in the title, once converged on an implementation.
> > Ok, will replace 'avoid' with 'fix' (once we agree on the solution)
> >
> > >
> > > Regarding Fixes and stable backport, we should consider if fixing
> > > this in
> > stable
> > > with a performance degradation, fixing with more complex solution,
> > > or documenting a known issue a better solution.
> > >
> > >
> > > This fix (always taking the atomic lock) will have a negative
> > > performance impact on existing code using services. We should
> > > investigate a way to fix
> > it
> > > without causing datapath performance degradation.
> > Trying to gauge the impact on the existing applications...
> > The documentation does not explicitly disallow run time mapping of
> > cores to service.
> > 1) If the applications are mapping the cores to services at run time,
> > they are running with a bug. IMO, bug fix resulting in a performance
> > drop should be acceptable.
> > 2) If the service is configured to run on single core
> > (num_mapped_cores == 1), but service is set to MT unsafe - this will
> > have a (possible) performance impact.
> > 	a) This can be solved by setting the service to MT safe and can be
> > documented. This might be a reasonable solution for applications which
> > are compiling with
> >                    future DPDK releases.
> > 	b) We can also solve this using symbol versioning - the old version
> > of this function will use the old code, the new version of this
> > function will use the code in
> >                    this patch. So, if the application is run with
> > future DPDK releases without recompiling, it will continue to use the
> > old version. If the application is compiled
> >                    with future releases, they can use solution in 2a.
> > We also should think if this is an appropriate solution as this would
> > force 1) to recompile to get the fix.
> > 3) If the service is configured to run on multiple cores
> > (num_mapped_cores > 1), then for those applications, the lock is being
> > taken already. These applications might see some improvements as this
> > patch removes few instructions.
> >
> > >
> > > I think there is a way to achieve this by moving more checks/time to
> > > the control path (lcore updating the map), and not forcing the
> > > datapath lcore to always take an atomic.
> > I think 2a above is the solution.
> 
> 2a above is e.g. the Eventdev SW routines like Rx/Tx scheduler services.
I scanned through the code briefly
I see that Eth RX/TX, Crypto adapters are setting the MT_SAFE capabilities, can be ignored.
Timer adaptor and some others do not set MT_SAFE. Seems like the cores to run on are mapped during run time. But it is not clear to me if it can get mapped to run on multiple cores. If they are, they are running with the bug.
But, these are all internal to DPDK and can be fixed.
Are there no performance tests in these components that we can run?

> We should strive to not reduce datapath performance at all here.
> 
> 
> > > In this particular case, we have a counter for number of iterations
> > > that a
> > Which counter are you thinking about?
> > All the counters I checked are not atomic operations currently. If we
> > are going to use counters they have to be atomic, which means
> > additional cycles in the data path.
> 
> I'll try to explain the concept better, take this example:
>  - One service core is mapped to a MT_UNSAFE service, like event/sw pmd
>  - Application wants to map a 2nd lcore to the same service
>  - You point out that today there is a race over the lock
>     -- the lock is not taken if (num_mapped_lcores == 1)
>     -- this avoids an atomic lock/unlock on the datapath
> 
> To achieve our desired goal;
>  - control thread doing mapping performs the following operations
>     -- write service->num_mapped_lcores++ (atomic not required, only single-
> writer allowed by APIs)
This has to be atomic because of rte_service_run_iter_on_app_lcore API. Performance should be fine as this API is not called frequently. But need to consider the implications of more than one thread updating num_mapped_cores.

>     -- MFENCE (full st-ld barrier) to flush write, and force later loads to issue
> after
I am not exactly sure what MFENCE on x86 does. On Arm platforms, the full barrier (DMB ISH) just makes sure that memory operations are not re-ordered around it. It does not say anything about when that store is visible to other cores. It will be visible at some point in time to cores.
But, I do not think we need to be worried about flushing to memory.

>     -- read the "calls_per_service" counter for each lcores, add them up.
This can be trimmed down to the single core the service is mapped to currently, no need to add all the counters.

>     ---- Wait :)
>     -- re-read the "calls_per_service", and ensure the count has changed.
Basically, polling. This causes more traffic on the interconnect between the cores. But might be ok since this API might not be called frequently.

>     ---- The fact that the calls_per_service has changed ensures the service-
> lcore
>          has seen the new "num_mapped_cores" value, and has now taken the
> lock!
>     -- *now* it is safe to map the 2nd lcore to the service
> 
> There is a caveat here that the increments to the "calls_per_service" variable
> must become globally-observable. To force this immediately would require a
> write memory barrier, which could impact datapath performance. Given the
> service is now taking a lock, the unlock() thereof would ensure the
> "calls_per_service"
> is flushed to memory.
If we increment this variable only when the lock is held, we should be fine. We could have a separate variable.

> 
> Note: we could use calls_per_service, or add a new variable to the service
> struct.
> Writes to this do not need to be atomic, as it is either mapped to a single core,
> or else there's a lock around it.
I think it is better to have a separate variable that is updated only when the lock is held.
I do not see any change in API sequence. We do this hand-shake only if the service is running (which is all controlled in the writer thread), correct?

This does not solve the problem with rte_service_run_iter_on_app_lcore getting called on multiple cores concurrently for the same service. 

> 
> 
> > > service has done. If this increments we know that the lcore running
> > > the service has re-entered the critical section, so would see an
> > > updated "needs atomic" flag.
> > >
> > > This approach may introduce a predictable branch on the datapath,
> > > however the cost of a predictable branch vs always taking an atomic
> > > is order(s?) of magnitude, so a branch is much preferred.
> > >
> > > It must be possible to avoid the datapath overhead using a scheme like
> this.
> > It
> > > will likely be more complex than your proposed change below, however
> > > if it avoids datapath performance drops I feel that a more complex
> > > solution is worth investigating at least.
> 
> > I do not completely understand the approach you are proposing, may be
> > you can elaborate more.
> 
> Expanded above, showing a possible solution that does not require additional
> atomics on the datapath.
> 
> 
> > But, it seems to be based on a counter approach. Following is my
> > assessment on what happens if we use a counter. Let us say we kept
> > track of how many cores are running the service currently. We need an
> > atomic counter other than 'num_mapped_cores'. Let us call that counter
> 'num_current_cores'.
> > The code to call the service would look like below.
> >
> > 1) rte_atomic32_inc(&num_current_cores); /* this results in a full
> > memory barrier */
> > 2) if (__atomic_load_n(&num_current_cores, __ATOMIC_ACQUIRE) == 1) {
> > /* rte_atomic_read is not enough here as it does not provide the
> > required memory barrier for any architecture */
> > 3) 	run_service(); /* Call the service */
> > 4) }
> > 5) rte_atomic32_sub(&num_current_cores); /* Calling rte_atomic32_clear
> > is not enough as it is not an atomic operation and does not provide
> > the required memory barrier */
> >
> > But the above code has race conditions in lines 1 and 2. It is
> > possible that none of the cores will ever get to run the service as
> > they all could simultaneously increment the counter. Hence lines 1 and
> > 2 together need to be atomic, which is nothing but 'compare-exchange'
> operation.
> >
> > BTW, the current code has a bug where it calls 'rte_atomic_clear(&s-
> > >execute_lock)', it is missing memory barriers which results in
> > >clearing the
> > execute_lock before the service has completed running. I suggest
> > changing the 'execute_lock' to rte_spinlock_t and using
> > rte_spinlock_try_lock and rte_spinlock_unlock APIs.
> 
> I don't think a spinlock is what we want here:
> 
> The idea is that a service-lcore can be mapped to multiple services.
> If one service is already being run (by another thread), we do not want to spin
> here waiting for it to become "free" to run by this thread, it should continue
> to the next service that it is mapped to.
Agree. I am suggesting to use 'rte_spinlock_try_lock' (does not spin) which is nothing but 'compare-exchange'. Since the API is available, we should make use of it instead of repeating the code.

> 
> 
> > >
> > > A unit test is required to validate a fix like this - although
> > > perhaps found
> > by
> > > inspection/review, a real-world test to validate would give confidence.
> > Agree, need to have a test case.
> >
> > >
> > >
> > > Thoughts on such an approach?
> > >
> <snip patch contents>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-09  1:31             ` Honnappa Nagarahalli
@ 2020-04-09 16:46               ` Van Haaren, Harry
  2020-04-18  6:21                 ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-09 16:46 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Thursday, April 9, 2020 2:32 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org;
> maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Gavin Hu <Gavin.Hu@arm.com>; Ruifeng Wang <Ruifeng.Wang@arm.com>; Joyce Kong
> <Joyce.Kong@arm.com>; nd <nd@arm.com>; stable@dpdk.org; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v3 09/12] service: avoid race condition for MT unsafe
> service
> 
> <snip>
> 
> > >
> > > > > Subject: [PATCH v3 09/12] service: avoid race condition for MT
> > > > > unsafe service
> > > > >
> > > > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > >
> > > > > There has possible that a MT unsafe service might get configured
> > > > > to run on another core while the service is running currently.
> > > > > This might result in the MT unsafe service running on multiple
> > > > > cores simultaneously. Use 'execute_lock' always when the service
> > > > > is MT unsafe.
> > > > >
> > > > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > > > Cc: stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > > >
> > > > We should put "fix" in the title, once converged on an implementation.
> > > Ok, will replace 'avoid' with 'fix' (once we agree on the solution)
> > >
> > > >
> > > > Regarding Fixes and stable backport, we should consider if fixing
> > > > this in
> > > stable
> > > > with a performance degradation, fixing with more complex solution,
> > > > or documenting a known issue a better solution.
> > > >
> > > >
> > > > This fix (always taking the atomic lock) will have a negative
> > > > performance impact on existing code using services. We should
> > > > investigate a way to fix
> > > it
> > > > without causing datapath performance degradation.
> > > Trying to gauge the impact on the existing applications...
> > > The documentation does not explicitly disallow run time mapping of
> > > cores to service.
> > > 1) If the applications are mapping the cores to services at run time,
> > > they are running with a bug. IMO, bug fix resulting in a performance
> > > drop should be acceptable.
> > > 2) If the service is configured to run on single core
> > > (num_mapped_cores == 1), but service is set to MT unsafe - this will
> > > have a (possible) performance impact.
> > > 	a) This can be solved by setting the service to MT safe and can be
> > > documented. This might be a reasonable solution for applications which
> > > are compiling with
> > >                    future DPDK releases.
> > > 	b) We can also solve this using symbol versioning - the old version
> > > of this function will use the old code, the new version of this
> > > function will use the code in
> > >                    this patch. So, if the application is run with
> > > future DPDK releases without recompiling, it will continue to use the
> > > old version. If the application is compiled
> > >                    with future releases, they can use solution in 2a.
> > > We also should think if this is an appropriate solution as this would
> > > force 1) to recompile to get the fix.
> > > 3) If the service is configured to run on multiple cores
> > > (num_mapped_cores > 1), then for those applications, the lock is being
> > > taken already. These applications might see some improvements as this
> > > patch removes few instructions.
> > >
> > > >
> > > > I think there is a way to achieve this by moving more checks/time to
> > > > the control path (lcore updating the map), and not forcing the
> > > > datapath lcore to always take an atomic.
> > > I think 2a above is the solution.
> >
> > 2a above is e.g. the Eventdev SW routines like Rx/Tx scheduler services.
> I scanned through the code briefly
> I see that Eth RX/TX, Crypto adapters are setting the MT_SAFE capabilities,
> can be ignored.
> Timer adaptor and some others do not set MT_SAFE. Seems like the cores to run
> on are mapped during run time. But it is not clear to me if it can get mapped
> to run on multiple cores. If they are, they are running with the bug.

EAL will map each service to a single lcore. It will "round-robin" if
there are more services than service-lcores to run them on. So agree
that DPDK's default mappings will not suffer this issue.


> But, these are all internal to DPDK and can be fixed.
> Are there no performance tests in these components that we can run?
>
> > We should strive to not reduce datapath performance at all here.
> >
> >
> > > > In this particular case, we have a counter for number of iterations
> > > > that a
> > > Which counter are you thinking about?
> > > All the counters I checked are not atomic operations currently. If we
> > > are going to use counters they have to be atomic, which means
> > > additional cycles in the data path.
> >
> > I'll try to explain the concept better, take this example:
> >  - One service core is mapped to a MT_UNSAFE service, like event/sw pmd
> >  - Application wants to map a 2nd lcore to the same service
> >  - You point out that today there is a race over the lock
> >     -- the lock is not taken if (num_mapped_lcores == 1)
> >     -- this avoids an atomic lock/unlock on the datapath
> >
> > To achieve our desired goal;
> >  - control thread doing mapping performs the following operations
> >     -- write service->num_mapped_lcores++ (atomic not required, only single-
> > writer allowed by APIs)
> This has to be atomic because of rte_service_run_iter_on_app_lcore API.
> Performance should be fine as this API is not called frequently. But need to
> consider the implications of more than one thread updating num_mapped_cores.
> 
> >     -- MFENCE (full st-ld barrier) to flush write, and force later loads to
> issue
> > after
> I am not exactly sure what MFENCE on x86 does. On Arm platforms, the full
> barrier (DMB ISH) just makes sure that memory operations are not re-ordered
> around it. It does not say anything about when that store is visible to other
> cores. It will be visible at some point in time to cores.
> But, I do not think we need to be worried about flushing to memory.
> 
> >     -- read the "calls_per_service" counter for each lcores, add them up.
> This can be trimmed down to the single core the service is mapped to
> currently, no need to add all the counters.

Correct - however that requires figuring out which lcore is running the
service. Anyway, agree - it's an implementation detail as to exactly how
we detect it.

> 
> >     ---- Wait :)
> >     -- re-read the "calls_per_service", and ensure the count has changed.
> Basically, polling. This causes more traffic on the interconnect between the
> cores. But might be ok since this API might not be called frequently.

Agree this will not be called frequently, and that some polling here will
not be a problem.


> >     ---- The fact that the calls_per_service has changed ensures the
> service-
> > lcore
> >          has seen the new "num_mapped_cores" value, and has now taken the
> > lock!
> >     -- *now* it is safe to map the 2nd lcore to the service
> >
> > There is a caveat here that the increments to the "calls_per_service"
> variable
> > must become globally-observable. To force this immediately would require a
> > write memory barrier, which could impact datapath performance. Given the
> > service is now taking a lock, the unlock() thereof would ensure the
> > "calls_per_service"
> > is flushed to memory.
> If we increment this variable only when the lock is held, we should be fine.
> We could have a separate variable.

Sure, if a separate variable is preferred that's fine with me.


> > Note: we could use calls_per_service, or add a new variable to the service
> > struct.
> > Writes to this do not need to be atomic, as it is either mapped to a single
> core,
> > or else there's a lock around it.
> I think it is better to have a separate variable that is updated only when the
> lock is held.
> I do not see any change in API sequence. We do this hand-shake only if the
> service is running (which is all controlled in the writer thread), correct?

Yes this increment can be localized to just the branch when the unlock() occurs,
as that is the only time it could make a difference.

> This does not solve the problem with rte_service_run_iter_on_app_lcore getting
> called on multiple cores concurrently for the same service.

Agreed. This "on_app_lcore" API was an addition required to enable unit-testing
in a sane way, to run iterations of eg Eventdev PMD.

I am in favor of documenting that the application is responsible to ensure
the service being run on a specific application lcore is not concurrently
running on another application lcore.


> > > > service has done. If this increments we know that the lcore running
> > > > the service has re-entered the critical section, so would see an
> > > > updated "needs atomic" flag.
> > > >
> > > > This approach may introduce a predictable branch on the datapath,
> > > > however the cost of a predictable branch vs always taking an atomic
> > > > is order(s?) of magnitude, so a branch is much preferred.
> > > >
> > > > It must be possible to avoid the datapath overhead using a scheme like
> > this.
> > > It
> > > > will likely be more complex than your proposed change below, however
> > > > if it avoids datapath performance drops I feel that a more complex
> > > > solution is worth investigating at least.
> >
> > > I do not completely understand the approach you are proposing, may be
> > > you can elaborate more.
> >
> > Expanded above, showing a possible solution that does not require additional
> > atomics on the datapath.
> >
> >
> > > But, it seems to be based on a counter approach. Following is my
> > > assessment on what happens if we use a counter. Let us say we kept
> > > track of how many cores are running the service currently. We need an
> > > atomic counter other than 'num_mapped_cores'. Let us call that counter
> > 'num_current_cores'.
> > > The code to call the service would look like below.
> > >
> > > 1) rte_atomic32_inc(&num_current_cores); /* this results in a full
> > > memory barrier */
> > > 2) if (__atomic_load_n(&num_current_cores, __ATOMIC_ACQUIRE) == 1) {
> > > /* rte_atomic_read is not enough here as it does not provide the
> > > required memory barrier for any architecture */
> > > 3) 	run_service(); /* Call the service */
> > > 4) }
> > > 5) rte_atomic32_sub(&num_current_cores); /* Calling rte_atomic32_clear
> > > is not enough as it is not an atomic operation and does not provide
> > > the required memory barrier */
> > >
> > > But the above code has race conditions in lines 1 and 2. It is
> > > possible that none of the cores will ever get to run the service as
> > > they all could simultaneously increment the counter. Hence lines 1 and
> > > 2 together need to be atomic, which is nothing but 'compare-exchange'
> > operation.
> > >
> > > BTW, the current code has a bug where it calls 'rte_atomic_clear(&s-
> > > >execute_lock)', it is missing memory barriers which results in
> > > >clearing the
> > > execute_lock before the service has completed running. I suggest
> > > changing the 'execute_lock' to rte_spinlock_t and using
> > > rte_spinlock_try_lock and rte_spinlock_unlock APIs.
> >
> > I don't think a spinlock is what we want here:
> >
> > The idea is that a service-lcore can be mapped to multiple services.
> > If one service is already being run (by another thread), we do not want to
> spin
> > here waiting for it to become "free" to run by this thread, it should
> continue
> > to the next service that it is mapped to.
> Agree. I am suggesting to use 'rte_spinlock_try_lock' (does not spin) which is
> nothing but 'compare-exchange'. Since the API is available, we should make use
> of it instead of repeating the code.

Ah apologies, I misread the spinlock usage. Sure if the spinlock_t code
is preferred I'm ok with a change. It would be clean to have a separate
patch in the patchset to make this change, and have it later in the set
than the changes for backporting to ease integration with stable branch.


> > > > A unit test is required to validate a fix like this - although
> > > > perhaps found
> > > by
> > > > inspection/review, a real-world test to validate would give confidence.
> > > Agree, need to have a test case.
> > >
> > > >
> > > >
> > > > Thoughts on such an approach?
> > > >
> > <snip patch contents>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-09 16:46               ` Van Haaren, Harry
@ 2020-04-18  6:21                 ` Honnappa Nagarahalli
  2020-04-21 17:43                   ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-18  6:21 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, nd, Honnappa Nagarahalli, nd

<snip>

> > > >
> > > > > > Subject: [PATCH v3 09/12] service: avoid race condition for MT
> > > > > > unsafe service
> > > > > >
> > > > > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > > >
> > > > > > There has possible that a MT unsafe service might get
> > > > > > configured to run on another core while the service is running
> currently.
> > > > > > This might result in the MT unsafe service running on multiple
> > > > > > cores simultaneously. Use 'execute_lock' always when the
> > > > > > service is MT unsafe.
> > > > > >
> > > > > > Fixes: e9139a32f6e8 ("service: add function to run on app
> > > > > > lcore")
> > > > > > Cc: stable@dpdk.org
> > > > > >
> > > > > > Signed-off-by: Honnappa Nagarahalli
> > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > > > Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> > > > >
> > > > > We should put "fix" in the title, once converged on an implementation.
> > > > Ok, will replace 'avoid' with 'fix' (once we agree on the
> > > > solution)
> > > >
> > > > >
> > > > > Regarding Fixes and stable backport, we should consider if
> > > > > fixing this in
> > > > stable
> > > > > with a performance degradation, fixing with more complex
> > > > > solution, or documenting a known issue a better solution.
> > > > >
> > > > >
> > > > > This fix (always taking the atomic lock) will have a negative
> > > > > performance impact on existing code using services. We should
> > > > > investigate a way to fix
> > > > it
> > > > > without causing datapath performance degradation.
> > > > Trying to gauge the impact on the existing applications...
> > > > The documentation does not explicitly disallow run time mapping of
> > > > cores to service.
> > > > 1) If the applications are mapping the cores to services at run
> > > > time, they are running with a bug. IMO, bug fix resulting in a
> > > > performance drop should be acceptable.
> > > > 2) If the service is configured to run on single core
> > > > (num_mapped_cores == 1), but service is set to MT unsafe - this
> > > > will have a (possible) performance impact.
> > > > 	a) This can be solved by setting the service to MT safe and can
> > > > be documented. This might be a reasonable solution for
> > > > applications which are compiling with
> > > >                    future DPDK releases.
> > > > 	b) We can also solve this using symbol versioning - the old
> > > > version of this function will use the old code, the new version of
> > > > this function will use the code in
> > > >                    this patch. So, if the application is run with
> > > > future DPDK releases without recompiling, it will continue to use
> > > > the old version. If the application is compiled
> > > >                    with future releases, they can use solution in 2a.
> > > > We also should think if this is an appropriate solution as this
> > > > would force 1) to recompile to get the fix.
> > > > 3) If the service is configured to run on multiple cores
> > > > (num_mapped_cores > 1), then for those applications, the lock is
> > > > being taken already. These applications might see some
> > > > improvements as this patch removes few instructions.
> > > >
> > > > >
> > > > > I think there is a way to achieve this by moving more
> > > > > checks/time to the control path (lcore updating the map), and
> > > > > not forcing the datapath lcore to always take an atomic.
> > > > I think 2a above is the solution.
> > >
> > > 2a above is e.g. the Eventdev SW routines like Rx/Tx scheduler services.
> > I scanned through the code briefly
> > I see that Eth RX/TX, Crypto adapters are setting the MT_SAFE
> > capabilities, can be ignored.
> > Timer adaptor and some others do not set MT_SAFE. Seems like the cores
> > to run on are mapped during run time. But it is not clear to me if it
> > can get mapped to run on multiple cores. If they are, they are running with
> the bug.
> 
> EAL will map each service to a single lcore. It will "round-robin" if there are
> more services than service-lcores to run them on. So agree that DPDK's
> default mappings will not suffer this issue.
> 
> 
> > But, these are all internal to DPDK and can be fixed.
> > Are there no performance tests in these components that we can run?
> >
> > > We should strive to not reduce datapath performance at all here.
> > >
> > >
> > > > > In this particular case, we have a counter for number of
> > > > > iterations that a
> > > > Which counter are you thinking about?
> > > > All the counters I checked are not atomic operations currently. If
> > > > we are going to use counters they have to be atomic, which means
> > > > additional cycles in the data path.
> > >
> > > I'll try to explain the concept better, take this example:
I tried to implement this algorithm, but there are few issues, please see below.

> > >  - One service core is mapped to a MT_UNSAFE service, like event/sw
> > > pmd
> > >  - Application wants to map a 2nd lcore to the same service
> > >  - You point out that today there is a race over the lock
> > >     -- the lock is not taken if (num_mapped_lcores == 1)
> > >     -- this avoids an atomic lock/unlock on the datapath
> > >
> > > To achieve our desired goal;
> > >  - control thread doing mapping performs the following operations
> > >     -- write service->num_mapped_lcores++ (atomic not required, only
> > > single- writer allowed by APIs)
> > This has to be atomic because of rte_service_run_iter_on_app_lcore API.
> > Performance should be fine as this API is not called frequently. But
> > need to consider the implications of more than one thread updating
> num_mapped_cores.
> >
> > >     -- MFENCE (full st-ld barrier) to flush write, and force later
> > > loads to
> > issue
> > > after
> > I am not exactly sure what MFENCE on x86 does. On Arm platforms, the
> > full barrier (DMB ISH) just makes sure that memory operations are not
> > re-ordered around it. It does not say anything about when that store
> > is visible to other cores. It will be visible at some point in time to cores.
> > But, I do not think we need to be worried about flushing to memory.
> >
> > >     -- read the "calls_per_service" counter for each lcores, add them up.
> > This can be trimmed down to the single core the service is mapped to
> > currently, no need to add all the counters.
> 
> Correct - however that requires figuring out which lcore is running the service.
> Anyway, agree - it's an implementation detail as to exactly how we detect it.
> 
> >
> > >     ---- Wait :)
> > >     -- re-read the "calls_per_service", and ensure the count has changed.
Here, there is an assumption that the service core function is running on the service core. If the service core is not running, the code will be stuck in this polling loop.

I could not come up with a good way to check if the service core is running. Checking the app_runstate and comp_runstate is not enough as they just indicate that the service is ready to run. Using the counter 'calls_per_service' introduces race conditions.

Only way I can think of is asking the user to follow a specific sequence of APIs to ensure the service core is running before calling rte_service_map_lcore_set.


> > Basically, polling. This causes more traffic on the interconnect
> > between the cores. But might be ok since this API might not be called
> frequently.
> 
> Agree this will not be called frequently, and that some polling here will not be
> a problem.
> 
> 
> > >     ---- The fact that the calls_per_service has changed ensures the
> > service-
> > > lcore
> > >          has seen the new "num_mapped_cores" value, and has now
> > > taken the lock!
> > >     -- *now* it is safe to map the 2nd lcore to the service
> > >
> > > There is a caveat here that the increments to the "calls_per_service"
> > variable
> > > must become globally-observable. To force this immediately would
> > > require a write memory barrier, which could impact datapath
> > > performance. Given the service is now taking a lock, the unlock()
> > > thereof would ensure the "calls_per_service"
> > > is flushed to memory.
> > If we increment this variable only when the lock is held, we should be fine.
> > We could have a separate variable.
> 
> Sure, if a separate variable is preferred that's fine with me.
> 
> 
> > > Note: we could use calls_per_service, or add a new variable to the
> > > service struct.
> > > Writes to this do not need to be atomic, as it is either mapped to a
> > > single
> > core,
> > > or else there's a lock around it.
> > I think it is better to have a separate variable that is updated only
> > when the lock is held.
> > I do not see any change in API sequence. We do this hand-shake only if
> > the service is running (which is all controlled in the writer thread), correct?
> 
> Yes this increment can be localized to just the branch when the unlock()
> occurs, as that is the only time it could make a difference.
> 
> > This does not solve the problem with rte_service_run_iter_on_app_lcore
> > getting called on multiple cores concurrently for the same service.
> 
> Agreed. This "on_app_lcore" API was an addition required to enable unit-
> testing in a sane way, to run iterations of eg Eventdev PMD.
> 
> I am in favor of documenting that the application is responsible to ensure the
> service being run on a specific application lcore is not concurrently running on
> another application lcore.
> 
> 
> > > > > service has done. If this increments we know that the lcore
> > > > > running the service has re-entered the critical section, so
> > > > > would see an updated "needs atomic" flag.
> > > > >
> > > > > This approach may introduce a predictable branch on the
> > > > > datapath, however the cost of a predictable branch vs always
> > > > > taking an atomic is order(s?) of magnitude, so a branch is much
> preferred.
> > > > >
> > > > > It must be possible to avoid the datapath overhead using a
> > > > > scheme like
> > > this.
> > > > It
> > > > > will likely be more complex than your proposed change below,
> > > > > however if it avoids datapath performance drops I feel that a
> > > > > more complex solution is worth investigating at least.
> > >
> > > > I do not completely understand the approach you are proposing, may
> > > > be you can elaborate more.
> > >
> > > Expanded above, showing a possible solution that does not require
> > > additional atomics on the datapath.
> > >
> > >
> > > > But, it seems to be based on a counter approach. Following is my
> > > > assessment on what happens if we use a counter. Let us say we kept
> > > > track of how many cores are running the service currently. We need
> > > > an atomic counter other than 'num_mapped_cores'. Let us call that
> > > > counter
> > > 'num_current_cores'.
> > > > The code to call the service would look like below.
> > > >
> > > > 1) rte_atomic32_inc(&num_current_cores); /* this results in a full
> > > > memory barrier */
> > > > 2) if (__atomic_load_n(&num_current_cores, __ATOMIC_ACQUIRE) == 1)
> > > > {
> > > > /* rte_atomic_read is not enough here as it does not provide the
> > > > required memory barrier for any architecture */
> > > > 3) 	run_service(); /* Call the service */
> > > > 4) }
> > > > 5) rte_atomic32_sub(&num_current_cores); /* Calling
> > > > rte_atomic32_clear is not enough as it is not an atomic operation
> > > > and does not provide the required memory barrier */
> > > >
> > > > But the above code has race conditions in lines 1 and 2. It is
> > > > possible that none of the cores will ever get to run the service
> > > > as they all could simultaneously increment the counter. Hence
> > > > lines 1 and
> > > > 2 together need to be atomic, which is nothing but 'compare-exchange'
> > > operation.
> > > >
> > > > BTW, the current code has a bug where it calls
> > > > 'rte_atomic_clear(&s-
> > > > >execute_lock)', it is missing memory barriers which results in
> > > > >clearing the
> > > > execute_lock before the service has completed running. I suggest
> > > > changing the 'execute_lock' to rte_spinlock_t and using
> > > > rte_spinlock_try_lock and rte_spinlock_unlock APIs.
> > >
> > > I don't think a spinlock is what we want here:
> > >
> > > The idea is that a service-lcore can be mapped to multiple services.
> > > If one service is already being run (by another thread), we do not
> > > want to
> > spin
> > > here waiting for it to become "free" to run by this thread, it
> > > should
> > continue
> > > to the next service that it is mapped to.
> > Agree. I am suggesting to use 'rte_spinlock_try_lock' (does not spin)
> > which is nothing but 'compare-exchange'. Since the API is available,
> > we should make use of it instead of repeating the code.
> 
> Ah apologies, I misread the spinlock usage. Sure if the spinlock_t code is
> preferred I'm ok with a change. It would be clean to have a separate patch in
> the patchset to make this change, and have it later in the set than the changes
> for backporting to ease integration with stable branch.
> 
> 
> > > > > A unit test is required to validate a fix like this - although
> > > > > perhaps found
> > > > by
> > > > > inspection/review, a real-world test to validate would give confidence.
> > > > Agree, need to have a test case.
> > > >
> > > > >
> > > > >
> > > > > Thoughts on such an approach?
> > > > >
> > > <snip patch contents>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service
  2020-04-18  6:21                 ` Honnappa Nagarahalli
@ 2020-04-21 17:43                   ` Van Haaren, Harry
  0 siblings, 0 replies; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-21 17:43 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang, thomas, Ananyev, Konstantin,
	stephen, maxime.coquelin, dev
  Cc: david.marchand, jerinj, hemant.agrawal, Gavin Hu, Ruifeng Wang,
	Joyce Kong, nd, stable, nd, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Saturday, April 18, 2020 7:22 AM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; thomas@monjalon.net; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stephen@networkplumber.org;
> maxime.coquelin@redhat.com; dev@dpdk.org
> Cc: david.marchand@redhat.com; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Gavin Hu <Gavin.Hu@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Joyce Kong <Joyce.Kong@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v3 09/12] service: avoid race condition for MT unsafe service
> 
> <snip>
<snip snip>
> > > > To achieve our desired goal;
> > > >  - control thread doing mapping performs the following operations
> > > >     -- write service->num_mapped_lcores++ (atomic not required, only
> > > > single- writer allowed by APIs)
> > > This has to be atomic because of rte_service_run_iter_on_app_lcore API.
> > > Performance should be fine as this API is not called frequently. But
> > > need to consider the implications of more than one thread updating
> > num_mapped_cores.
> > >
> > > >     -- MFENCE (full st-ld barrier) to flush write, and force later
> > > > loads to
> > > issue
> > > > after
> > > I am not exactly sure what MFENCE on x86 does. On Arm platforms, the
> > > full barrier (DMB ISH) just makes sure that memory operations are not
> > > re-ordered around it. It does not say anything about when that store
> > > is visible to other cores. It will be visible at some point in time to cores.
> > > But, I do not think we need to be worried about flushing to memory.
> > >
> > > >     -- read the "calls_per_service" counter for each lcores, add them up.
> > > This can be trimmed down to the single core the service is mapped to
> > > currently, no need to add all the counters.
> >
> > Correct - however that requires figuring out which lcore is running the service.
> > Anyway, agree - it's an implementation detail as to exactly how we detect it.
> >
> > >
> > > >     ---- Wait :)
> > > >     -- re-read the "calls_per_service", and ensure the count has changed.
> Here, there is an assumption that the service core function is running on the
> service core. If the service core is not running, the code will be stuck in this
> polling loop.

Right - we could add a timeout ehre, but that just moves the problem somewhere
else (the application) which now needs to handle error rets, and possibly retries.
It could be a possible solution.. I'm not in favour of it at the moment, but it
needs some more time to think.

> I could not come up with a good way to check if the service core is running.
> Checking the app_runstate and comp_runstate is not enough as they just
> indicate that the service is ready to run. Using the counter 'calls_per_service'
> introduces race conditions.
> 
> Only way I can think of is asking the user to follow a specific sequence of APIs to
> ensure the service core is running before calling rte_service_map_lcore_set.

Good point - I'm thinking about this - but haven't come to an obvious conclusion yet.
I'm considering other ways to detect the core is/isn't running, and also considering
just "high-jacking" the service function pointer temporarily with a CAS, which gives
some new options on avoiding threads entering the critical section.

As above, I don't have a good solution yet.

<snip irrelevant to above discussion stuff>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
       [not found]       ` <1587659482-27133-1-git-send-email-phil.yang@arm.com>
@ 2020-04-23 16:31         ` Phil Yang
  2020-04-29 16:51           ` Van Haaren, Harry
  2020-04-23 16:31         ` [dpdk-stable] [PATCH v2 2/6] service: identify service running on another core correctly Phil Yang
                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 51+ messages in thread
From: Phil Yang @ 2020-04-23 16:31 UTC (permalink / raw)
  To: harry.van.haaren, dev
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, Honnappa.Nagarahalli, gavin.hu, nd,
	Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The MT unsafe service might get configured to run on another core
while the service is running currently. This might result in the
MT unsafe service running on multiple cores simultaneously. Use
'execute_lock' always when the service is MT unsafe.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 70d17a5..b8c465e 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v2 2/6] service: identify service running on another core correctly
       [not found]       ` <1587659482-27133-1-git-send-email-phil.yang@arm.com>
  2020-04-23 16:31         ` [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service Phil Yang
@ 2020-04-23 16:31         ` Phil Yang
       [not found]         ` <20200502000245.11071-1-honnappa.nagarahalli@arm.com>
       [not found]         ` <20200505211732.25291-1-honnappa.nagarahalli@arm.com>
  3 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-04-23 16:31 UTC (permalink / raw)
  To: harry.van.haaren, dev
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, Honnappa.Nagarahalli, gavin.hu, nd,
	Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduces the number of
instructions for all cases.

Cc: stable@dpdk.org
Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b8c465e..c89472b 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ rte_service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
  2020-04-23 16:31         ` [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service Phil Yang
@ 2020-04-29 16:51           ` Van Haaren, Harry
  2020-04-29 22:48             ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-04-29 16:51 UTC (permalink / raw)
  To: Phil Yang, dev
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Honnappa.Nagarahalli, gavin.hu, nd,
	Honnappa Nagarahalli, stable, Eads, Gage, Richardson, Bruce

> -----Original Message-----
> From: Phil Yang <phil.yang@arm.com>
> Sent: Thursday, April 23, 2020 5:31 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; david.marchand@redhat.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Honnappa.Nagarahalli@arm.com;
> gavin.hu@arm.com; nd@arm.com; Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>; stable@dpdk.org
> Subject: [PATCH v2 1/6] service: fix race condition for MT unsafe service
> 
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> The MT unsafe service might get configured to run on another core
> while the service is running currently. This might result in the
> MT unsafe service running on multiple cores simultaneously. Use
> 'execute_lock' always when the service is MT unsafe.
> 
> Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> ---

Thanks for spinning a new revision - based on ML discussion previously,
it seems like the "use service-run-count" to avoid this race would be a
complex solution.

Suggesting the following;
1) Take the approach as per this patch, to always take the atomic, fixing the race condition.
2) Add an API to service-cores, which allows "committing" of mappings. Committing the mapping would imply that the mappings will not be changed in future. With runtime-remapping being removed from the equation, the existing branch-over-atomic optimization is valid again.

So this would offer applications two situations
A) No application change: possible performance regression due to atomic always taken.
B) Call "commit" API, and regain the performance as per previous DPDK versions.

Thoughts/opinions on the above?  I've flagged the rest of the patchset for review ASAP. Regards, -Harry

>  lib/librte_eal/common/rte_service.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/librte_eal/common/rte_service.c
> b/lib/librte_eal/common/rte_service.c
> index 70d17a5..b8c465e 100644
> --- a/lib/librte_eal/common/rte_service.c
> +++ b/lib/librte_eal/common/rte_service.c
> @@ -50,6 +50,10 @@ struct rte_service_spec_impl {
>  	uint8_t internal_flags;
> 
>  	/* per service statistics */
> +	/* Indicates how many cores the service is mapped to run on.
> +	 * It does not indicate the number of cores the service is running
> +	 * on currently.
> +	 */
>  	rte_atomic32_t num_mapped_cores;
>  	uint64_t calls;
>  	uint64_t cycles_spent;
> @@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t
> service_mask,
> 
>  	cs->service_active_on_lcore[i] = 1;
> 
> -	/* check do we need cmpset, if MT safe or <= 1 core
> -	 * mapped, atomic ops are not required.
> -	 */
> -	const int use_atomics = (service_mt_safe(s) == 0) &&
> -				(rte_atomic32_read(&s->num_mapped_cores) > 1);
> -	if (use_atomics) {
> +	if (service_mt_safe(s) == 0) {
>  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
>  			return -EBUSY;
> 
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
  2020-04-29 16:51           ` Van Haaren, Harry
@ 2020-04-29 22:48             ` Honnappa Nagarahalli
  2020-05-01 14:21               ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-04-29 22:48 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, dev
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Gavin Hu, nd, stable, Eads, Gage, Richardson,
	Bruce, Honnappa Nagarahalli, nd

Hi Harry,
	Thanks for getting back on this.

<snip>

> > Subject: [PATCH v2 1/6] service: fix race condition for MT unsafe
> > service
> >
> > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> >
> > The MT unsafe service might get configured to run on another core
> > while the service is running currently. This might result in the MT
> > unsafe service running on multiple cores simultaneously. Use
> > 'execute_lock' always when the service is MT unsafe.
> >
> > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > ---
> 
> Thanks for spinning a new revision - based on ML discussion previously, it
> seems like the "use service-run-count" to avoid this race would be a complex
> solution.
> 
> Suggesting the following;
> 1) Take the approach as per this patch, to always take the atomic, fixing the
> race condition.
Ok

> 2) Add an API to service-cores, which allows "committing" of mappings.
> Committing the mapping would imply that the mappings will not be changed
> in future. With runtime-remapping being removed from the equation, the
> existing branch-over-atomic optimization is valid again.
Ok. Just to make sure I understand this:
a) on the data plane, if commit API is called (probably a new state variable) and num_mapped_cores is set to 1, there is no need to take the lock.
b) possible implementation of the commit API would check if num_mapped_cores for the service is set to 1 and set a variable to indicate that the lock is not required.

What do you think about asking the application to set  the service capability to MT_SAFE if it knows that the service will run on a single core? This would require us to change the documentation and does not require additional code.

> 
> So this would offer applications two situations
> A) No application change: possible performance regression due to atomic
> always taken.
> B) Call "commit" API, and regain the performance as per previous DPDK
> versions.
> 
> Thoughts/opinions on the above?  I've flagged the rest of the patchset for
> review ASAP. Regards, -Harry
> 
> >  lib/librte_eal/common/rte_service.c | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/rte_service.c
> > b/lib/librte_eal/common/rte_service.c
> > index 70d17a5..b8c465e 100644
> > --- a/lib/librte_eal/common/rte_service.c
> > +++ b/lib/librte_eal/common/rte_service.c
> > @@ -50,6 +50,10 @@ struct rte_service_spec_impl {
> >  	uint8_t internal_flags;
> >
> >  	/* per service statistics */
> > +	/* Indicates how many cores the service is mapped to run on.
> > +	 * It does not indicate the number of cores the service is running
> > +	 * on currently.
> > +	 */
> >  	rte_atomic32_t num_mapped_cores;
> >  	uint64_t calls;
> >  	uint64_t cycles_spent;
> > @@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs,
> > uint64_t service_mask,
> >
> >  	cs->service_active_on_lcore[i] = 1;
> >
> > -	/* check do we need cmpset, if MT safe or <= 1 core
> > -	 * mapped, atomic ops are not required.
> > -	 */
> > -	const int use_atomics = (service_mt_safe(s) == 0) &&
> > -				(rte_atomic32_read(&s-
> >num_mapped_cores) > 1);
> > -	if (use_atomics) {
> > +	if (service_mt_safe(s) == 0) {
> >  		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
> >  			return -EBUSY;
> >
> > --
> > 2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
  2020-04-29 22:48             ` Honnappa Nagarahalli
@ 2020-05-01 14:21               ` Van Haaren, Harry
  2020-05-01 14:56                 ` Honnappa Nagarahalli
  0 siblings, 1 reply; 51+ messages in thread
From: Van Haaren, Harry @ 2020-05-01 14:21 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang, dev
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Gavin Hu, nd, stable, Eads, Gage, Richardson,
	Bruce, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Wednesday, April 29, 2020 11:49 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; david.marchand@redhat.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Gavin Hu <Gavin.Hu@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org; Eads, Gage <gage.eads@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v2 1/6] service: fix race condition for MT unsafe service
> 
> Hi Harry,
> 	Thanks for getting back on this.
> 
> <snip>
> 
> > > Subject: [PATCH v2 1/6] service: fix race condition for MT unsafe
> > > service
> > >
> > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > >
> > > The MT unsafe service might get configured to run on another core
> > > while the service is running currently. This might result in the MT
> > > unsafe service running on multiple cores simultaneously. Use
> > > 'execute_lock' always when the service is MT unsafe.
> > >
> > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > ---
> >
> > Thanks for spinning a new revision - based on ML discussion previously, it
> > seems like the "use service-run-count" to avoid this race would be a complex
> > solution.
> >
> > Suggesting the following;
> > 1) Take the approach as per this patch, to always take the atomic, fixing the
> > race condition.
> Ok

I've micro-benchmarked this code change inside the service cores autotest, and it
introduces around 35 cycles of overhead per service call.  This is not ideal, but given
it's a bugfix, and by far the simplest method to fix this race-condition. Having
discussed and investigated multiple other solutions, I believe this is the right solution.
Thanks Honnappa and Phil for identifying and driving a solution.

I suggest to post the benchmarking unit-test addition patch, and integrate that
*before* the series under review here gets merged? This makes benchmarking
of the "before bugfix" performance in future easier should it be required.


> > 2) Add an API to service-cores, which allows "committing" of mappings.
> > Committing the mapping would imply that the mappings will not be changed
> > in future. With runtime-remapping being removed from the equation, the
> > existing branch-over-atomic optimization is valid again.
> Ok. Just to make sure I understand this:
> a) on the data plane, if commit API is called (probably a new state variable) and
> num_mapped_cores is set to 1, there is no need to take the lock.
> b) possible implementation of the commit API would check if
> num_mapped_cores for the service is set to 1 and set a variable to indicate that
> the lock is not required.
> 
> What do you think about asking the application to set  the service capability to
> MT_SAFE if it knows that the service will run on a single core? This would
> require us to change the documentation and does not require additional code.

That's a nice idea - I like that if applications want to micro-optimize around
the atomic, that they have a workaround/solution to do so, particularly that it
doesn't require code-changes and backporting.

Will send review and send feedback on the patches themselves.
Regards, -Harry

> > So this would offer applications two situations
> > A) No application change: possible performance regression due to atomic
> > always taken.
> > B) Call "commit" API, and regain the performance as per previous DPDK
> > versions.
> >
> > Thoughts/opinions on the above?  I've flagged the rest of the patchset for
> > review ASAP. Regards, -Harry
> >
> > >  lib/librte_eal/common/rte_service.c | 11 +++++------
> > >  1 file changed, 5 insertions(+), 6 deletions(-)
<snip patch changes>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
  2020-05-01 14:21               ` Van Haaren, Harry
@ 2020-05-01 14:56                 ` Honnappa Nagarahalli
  2020-05-01 17:51                   ` Van Haaren, Harry
  0 siblings, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-05-01 14:56 UTC (permalink / raw)
  To: Van Haaren, Harry, Phil Yang, dev
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Gavin Hu, nd, stable, Eads, Gage, Richardson,
	Bruce, nd, Honnappa Nagarahalli, nd

<snip>
> >
> > > > Subject: [PATCH v2 1/6] service: fix race condition for MT unsafe
> > > > service
> > > >
> > > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > >
> > > > The MT unsafe service might get configured to run on another core
> > > > while the service is running currently. This might result in the
> > > > MT unsafe service running on multiple cores simultaneously. Use
> > > > 'execute_lock' always when the service is MT unsafe.
> > > >
> > > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > ---
> > >
> > > Thanks for spinning a new revision - based on ML discussion
> > > previously, it seems like the "use service-run-count" to avoid this
> > > race would be a complex solution.
> > >
> > > Suggesting the following;
> > > 1) Take the approach as per this patch, to always take the atomic,
> > > fixing the race condition.
> > Ok
> 
> I've micro-benchmarked this code change inside the service cores autotest,
> and it introduces around 35 cycles of overhead per service call.  This is not
> ideal, but given it's a bugfix, and by far the simplest method to fix this race-
> condition. Having discussed and investigated multiple other solutions, I
> believe this is the right solution.
> Thanks Honnappa and Phil for identifying and driving a solution.
You are welcome. Thank you for your timely responses.

> 
> I suggest to post the benchmarking unit-test addition patch, and integrate
> that
> *before* the series under review here gets merged? This makes
> benchmarking of the "before bugfix" performance in future easier should it be
> required.
I do not see any issues, would be happy to review. I think we still have time to catch up with RC2 (May 8th).
You had also mentioned about calling out that, the control plane APIs are not MT safe. Should I add that to this patch?

> 
> 
> > > 2) Add an API to service-cores, which allows "committing" of mappings.
> > > Committing the mapping would imply that the mappings will not be
> > > changed in future. With runtime-remapping being removed from the
> > > equation, the existing branch-over-atomic optimization is valid again.
> > Ok. Just to make sure I understand this:
> > a) on the data plane, if commit API is called (probably a new state
> > variable) and num_mapped_cores is set to 1, there is no need to take the
> lock.
> > b) possible implementation of the commit API would check if
> > num_mapped_cores for the service is set to 1 and set a variable to
> > indicate that the lock is not required.
> >
> > What do you think about asking the application to set  the service
> > capability to MT_SAFE if it knows that the service will run on a
> > single core? This would require us to change the documentation and does
> not require additional code.
> 
> That's a nice idea - I like that if applications want to micro-optimize around
> the atomic, that they have a workaround/solution to do so, particularly that it
> doesn't require code-changes and backporting.
> 
> Will send review and send feedback on the patches themselves.
> Regards, -Harry
> 
> > > So this would offer applications two situations
> > > A) No application change: possible performance regression due to
> > > atomic always taken.
> > > B) Call "commit" API, and regain the performance as per previous
> > > DPDK versions.
> > >
> > > Thoughts/opinions on the above?  I've flagged the rest of the
> > > patchset for review ASAP. Regards, -Harry
> > >
> > > >  lib/librte_eal/common/rte_service.c | 11 +++++------
> > > >  1 file changed, 5 insertions(+), 6 deletions(-)
> <snip patch changes>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service
  2020-05-01 14:56                 ` Honnappa Nagarahalli
@ 2020-05-01 17:51                   ` Van Haaren, Harry
  0 siblings, 0 replies; 51+ messages in thread
From: Van Haaren, Harry @ 2020-05-01 17:51 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang, dev
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Gavin Hu, nd, stable, Eads, Gage, Richardson,
	Bruce, nd, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Friday, May 1, 2020 3:56 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>; Phil Yang
> <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; david.marchand@redhat.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Gavin Hu <Gavin.Hu@arm.com>; nd
> <nd@arm.com>; stable@dpdk.org; Eads, Gage <gage.eads@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; nd <nd@arm.com>;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v2 1/6] service: fix race condition for MT unsafe service
> 
> <snip>
> > >
> > > > > Subject: [PATCH v2 1/6] service: fix race condition for MT unsafe
> > > > > service
> > > > >
> > > > > From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > >
> > > > > The MT unsafe service might get configured to run on another core
> > > > > while the service is running currently. This might result in the
> > > > > MT unsafe service running on multiple cores simultaneously. Use
> > > > > 'execute_lock' always when the service is MT unsafe.
> > > > >
> > > > > Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> > > > > Cc: stable@dpdk.org
> > > > >
> > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > > Reviewed-by: Phil Yang <phil.yang@arm.com>
> > > > > ---
> > > >
> > > > Thanks for spinning a new revision - based on ML discussion
> > > > previously, it seems like the "use service-run-count" to avoid this
> > > > race would be a complex solution.
> > > >
> > > > Suggesting the following;
> > > > 1) Take the approach as per this patch, to always take the atomic,
> > > > fixing the race condition.
> > > Ok
> >
> > I've micro-benchmarked this code change inside the service cores autotest,
> > and it introduces around 35 cycles of overhead per service call.  This is not
> > ideal, but given it's a bugfix, and by far the simplest method to fix this race-
> > condition. Having discussed and investigated multiple other solutions, I
> > believe this is the right solution.
> > Thanks Honnappa and Phil for identifying and driving a solution.
> You are welcome. Thank you for your timely responses.

Perhaps not so timely after all ... I'll review C11 patches Tuesday morning,
it's a long weekend in Ireland!

> > I suggest to post the benchmarking unit-test addition patch, and integrate
> > that
> > *before* the series under review here gets merged? This makes
> > benchmarking of the "before bugfix" performance in future easier should it be
> > required.
> I do not see any issues, would be happy to review.

Thanks for volunteering, you're on CC when sent, for convenience:
http://patches.dpdk.org/patch/69651/

> I think we still have time to catch up with RC2 (May 8th).

Agree, merge into RC2 would be great.

> You had also mentioned about calling out that, the control plane APIs are not
> MT safe. Should I add that to this patch?

Yes, that'd be great.

<snip discussion details>

Cheers, -Harry

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 1/6] service: fix race condition for MT unsafe service
       [not found]         ` <20200502000245.11071-1-honnappa.nagarahalli@arm.com>
@ 2020-05-02  0:02           ` Honnappa Nagarahalli
  2020-05-05 14:48             ` Van Haaren, Harry
  2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 2/6] service: identify service running on another core correctly Honnappa Nagarahalli
  1 sibling, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-05-02  0:02 UTC (permalink / raw)
  To: dev, phil.yang, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	honnappa.nagarahalli, nd, stable

The MT unsafe service might get configured to run on another core
while the service is running currently. This might result in the
MT unsafe service running on multiple cores simultaneously. Use
'execute_lock' always when the service is MT unsafe.

If the service is known to be mmapped on a single lcore,
setting the service capability to MT safe will avoid taking
the lock and improve the performance.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c            | 11 +++++------
 lib/librte_eal/include/rte_service.h           |  8 ++++++--
 lib/librte_eal/include/rte_service_component.h |  6 +++++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 70d17a5d7..b8c465eb9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
diff --git a/lib/librte_eal/include/rte_service.h b/lib/librte_eal/include/rte_service.h
index d8701dd4c..3a1c735c5 100644
--- a/lib/librte_eal/include/rte_service.h
+++ b/lib/librte_eal/include/rte_service.h
@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
  * Each core can be added or removed from running a specific service. This
  * function enables or disables *lcore* to run *service_id*.
  *
- * If multiple cores are enabled on a service, an atomic is used to ensure that
- * only one cores runs the service at a time. The exception to this is when
+ * If multiple cores are enabled on a service, a lock is used to ensure that
+ * only one core runs the service at a time. The exception to this is when
  * a service indicates that it is multi-thread safe by setting the capability
  * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
  * the service function can be run on multiple threads at the same time.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance by avoiding the use of lock.
+ *
  * @param service_id the service to apply the lcore to
  * @param lcore The lcore that will be mapped to service
  * @param enable Zero to unmap or disable the core, non-zero to enable
diff --git a/lib/librte_eal/include/rte_service_component.h b/lib/librte_eal/include/rte_service_component.h
index 16eab79ee..b75aba11b 100644
--- a/lib/librte_eal/include/rte_service_component.h
+++ b/lib/librte_eal/include/rte_service_component.h
@@ -43,7 +43,7 @@ struct rte_service_spec {
 /**
  * Register a new service.
  *
- * A service represents a component that the requires CPU time periodically to
+ * A service represents a component that requires CPU time periodically to
  * achieve its purpose.
  *
  * For example the eventdev SW PMD requires CPU cycles to perform its
@@ -56,6 +56,10 @@ struct rte_service_spec {
  * *rte_service_component_runstate_set*, which indicates that the service
  * component is ready to be executed.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance.
+ *
  * @param spec The specification of the service to register
  * @param[out] service_id A pointer to a uint32_t, which will be filled in
  *             during registration of the service. It is set to the integers
-- 
2.17.1


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v3 2/6] service: identify service running on another core correctly
       [not found]         ` <20200502000245.11071-1-honnappa.nagarahalli@arm.com>
  2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 1/6] service: fix race condition for MT unsafe service Honnappa Nagarahalli
@ 2020-05-02  0:02           ` Honnappa Nagarahalli
  2020-05-05 14:48             ` Van Haaren, Harry
  1 sibling, 1 reply; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-05-02  0:02 UTC (permalink / raw)
  To: dev, phil.yang, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	honnappa.nagarahalli, nd, stable

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduces the number of
instructions for all cases.

Cc: stable@dpdk.org
Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b8c465eb9..c89472b83 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to indicate that the service
+	 * is running on a core.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ rte_service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 1/6] service: fix race condition for MT unsafe service
  2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 1/6] service: fix race condition for MT unsafe service Honnappa Nagarahalli
@ 2020-05-05 14:48             ` Van Haaren, Harry
  0 siblings, 0 replies; 51+ messages in thread
From: Van Haaren, Harry @ 2020-05-05 14:48 UTC (permalink / raw)
  To: Honnappa Nagarahalli, dev, phil.yang
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Eads, Gage, Richardson, Bruce, nd, stable

> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Saturday, May 2, 2020 1:03 AM
> To: dev@dpdk.org; phil.yang@arm.com; Van Haaren, Harry
> <harry.van.haaren@intel.com>
> Cc: thomas@monjalon.net; david.marchand@redhat.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Eads, Gage <gage.eads@intel.com>; Richardson,
> Bruce <bruce.richardson@intel.com>; honnappa.nagarahalli@arm.com;
> nd@arm.com; stable@dpdk.org
> Subject: [PATCH v3 1/6] service: fix race condition for MT unsafe service
> 
> The MT unsafe service might get configured to run on another core
> while the service is running currently. This might result in the
> MT unsafe service running on multiple cores simultaneously. Use
> 'execute_lock' always when the service is MT unsafe.
> 
> If the service is known to be mmapped on a single lcore,

mmapped is a typo? Just mapped.

> setting the service capability to MT safe will avoid taking
> the lock and improve the performance.
>
> Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>

Acked-by: Harry van Haaren <harry.van.haaren@intel.com>

<snip diff>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [dpdk-stable] [PATCH v3 2/6] service: identify service running on another core correctly
  2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 2/6] service: identify service running on another core correctly Honnappa Nagarahalli
@ 2020-05-05 14:48             ` Van Haaren, Harry
  0 siblings, 0 replies; 51+ messages in thread
From: Van Haaren, Harry @ 2020-05-05 14:48 UTC (permalink / raw)
  To: Honnappa Nagarahalli, dev, phil.yang
  Cc: thomas, david.marchand, Ananyev, Konstantin, jerinj,
	hemant.agrawal, Eads, Gage, Richardson, Bruce, nd, stable

> -----Original Message-----
> From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Sent: Saturday, May 2, 2020 1:03 AM
> To: dev@dpdk.org; phil.yang@arm.com; Van Haaren, Harry
> <harry.van.haaren@intel.com>
> Cc: thomas@monjalon.net; david.marchand@redhat.com; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; jerinj@marvell.com;
> hemant.agrawal@nxp.com; Eads, Gage <gage.eads@intel.com>; Richardson,
> Bruce <bruce.richardson@intel.com>; honnappa.nagarahalli@arm.com;
> nd@arm.com; stable@dpdk.org
> Subject: [PATCH v3 2/6] service: identify service running on another core
> correctly
>
> The logic to identify if the MT unsafe service is running on another
> core can return -EBUSY spuriously. In such cases, running the service
> becomes costlier than using atomic operations. Assume that the
> application passes the right parameters and reduces the number of
> instructions for all cases.
> 
> Cc: stable@dpdk.org
> Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Add "fix" to the title, suggestion:
service: fix identification of service running on other lcore

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>

I believe there may be some optimizations we can apply after this patchset
as the "num_mapped_cores" variable is no longer used in a significant way 
for the atomic selection, however lets leave that optimization outside
of 20.05 scope.

With title (see above) & comment (see below) updated.
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>

> ---
<snip some diff>
> @@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id,
> uint32_t serialize_mt_unsafe)
> 
>  	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
> 
> -	/* Atomically add this core to the mapped cores first, then examine if
> -	 * we can run the service. This avoids a race condition between
> -	 * checking the value, and atomically adding to the mapped count.
> +	/* Increment num_mapped_cores to indicate that the service
> +	 * is running on a core.
>  	 */
> -	if (serialize_mt_unsafe)
> -		rte_atomic32_inc(&s->num_mapped_cores);
> +	rte_atomic32_inc(&s->num_mapped_cores);

The comment for the added lines here are a little confusing to me,
the "num_mapped_cores" does not indicate that the service "is running on a core",
it indicates the number of mapped lcores to that service. Suggestion below?

/* Increment num_mapped_cores to reflect that this core is
 * now mapped capable of running the service.
 */

> -	if (service_mt_safe(s) == 0 &&
> -			rte_atomic32_read(&s->num_mapped_cores) > 1) {
> -		if (serialize_mt_unsafe)
> -			rte_atomic32_dec(&s->num_mapped_cores);
> -		return -EBUSY;
> -	}
> -
> -	int ret = service_run(id, cs, UINT64_MAX, s);
> +	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);

<snip rest of diff>


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v4 1/6] service: fix race condition for MT unsafe service
       [not found]         ` <20200505211732.25291-1-honnappa.nagarahalli@arm.com>
@ 2020-05-05 21:17           ` Honnappa Nagarahalli
  2020-05-05 21:17           ` [dpdk-stable] [PATCH v4 2/6] service: fix identification of service running on other lcore Honnappa Nagarahalli
       [not found]           ` <1588760683-11027-1-git-send-email-phil.yang@arm.com>
  2 siblings, 0 replies; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-05-05 21:17 UTC (permalink / raw)
  To: dev, phil.yang, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	honnappa.nagarahalli, nd, stable

The MT unsafe service might get configured to run on another core
while the service is running currently. This might result in the
MT unsafe service running on multiple cores simultaneously. Use
'execute_lock' always when the service is MT unsafe.

If the service is known to be mmapped on a single lcore,
setting the service capability to MT safe will avoid taking
the lock and improve the performance.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c            | 11 +++++------
 lib/librte_eal/include/rte_service.h           |  8 ++++++--
 lib/librte_eal/include/rte_service_component.h |  6 +++++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 70d17a5d7..b8c465eb9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
diff --git a/lib/librte_eal/include/rte_service.h b/lib/librte_eal/include/rte_service.h
index d8701dd4c..3a1c735c5 100644
--- a/lib/librte_eal/include/rte_service.h
+++ b/lib/librte_eal/include/rte_service.h
@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
  * Each core can be added or removed from running a specific service. This
  * function enables or disables *lcore* to run *service_id*.
  *
- * If multiple cores are enabled on a service, an atomic is used to ensure that
- * only one cores runs the service at a time. The exception to this is when
+ * If multiple cores are enabled on a service, a lock is used to ensure that
+ * only one core runs the service at a time. The exception to this is when
  * a service indicates that it is multi-thread safe by setting the capability
  * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
  * the service function can be run on multiple threads at the same time.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance by avoiding the use of lock.
+ *
  * @param service_id the service to apply the lcore to
  * @param lcore The lcore that will be mapped to service
  * @param enable Zero to unmap or disable the core, non-zero to enable
diff --git a/lib/librte_eal/include/rte_service_component.h b/lib/librte_eal/include/rte_service_component.h
index 16eab79ee..b75aba11b 100644
--- a/lib/librte_eal/include/rte_service_component.h
+++ b/lib/librte_eal/include/rte_service_component.h
@@ -43,7 +43,7 @@ struct rte_service_spec {
 /**
  * Register a new service.
  *
- * A service represents a component that the requires CPU time periodically to
+ * A service represents a component that requires CPU time periodically to
  * achieve its purpose.
  *
  * For example the eventdev SW PMD requires CPU cycles to perform its
@@ -56,6 +56,10 @@ struct rte_service_spec {
  * *rte_service_component_runstate_set*, which indicates that the service
  * component is ready to be executed.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance.
+ *
  * @param spec The specification of the service to register
  * @param[out] service_id A pointer to a uint32_t, which will be filled in
  *             during registration of the service. It is set to the integers
-- 
2.17.1


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v4 2/6] service: fix identification of service running on other lcore
       [not found]         ` <20200505211732.25291-1-honnappa.nagarahalli@arm.com>
  2020-05-05 21:17           ` [dpdk-stable] [PATCH v4 1/6] service: fix race condition for MT unsafe service Honnappa Nagarahalli
@ 2020-05-05 21:17           ` Honnappa Nagarahalli
       [not found]           ` <1588760683-11027-1-git-send-email-phil.yang@arm.com>
  2 siblings, 0 replies; 51+ messages in thread
From: Honnappa Nagarahalli @ 2020-05-05 21:17 UTC (permalink / raw)
  To: dev, phil.yang, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	honnappa.nagarahalli, nd, stable

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduce the number of
instructions for all cases.

Cc: stable@dpdk.org
Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b8c465eb9..c283408cf 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to reflect that this core is
+	 * now mapped capable of running the service.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ rte_service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v5 1/6] service: fix race condition for MT unsafe service
       [not found]           ` <1588760683-11027-1-git-send-email-phil.yang@arm.com>
@ 2020-05-06 10:24             ` Phil Yang
  2020-05-06 10:24             ` [dpdk-stable] [PATCH v5 2/6] service: fix identification of service running on other lcore Phil Yang
       [not found]             ` <1588778884-13047-1-git-send-email-phil.yang@arm.com>
  2 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-05-06 10:24 UTC (permalink / raw)
  To: dev, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	Honnappa.Nagarahalli, nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The MT unsafe service might get configured to run on another core
while the service is running currently. This might result in the
MT unsafe service running on multiple cores simultaneously. Use
'execute_lock' always when the service is MT unsafe.

If the service is known to be mmapped on a single lcore,
setting the service capability to MT safe will avoid taking
the lock and improve the performance.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c            | 11 +++++------
 lib/librte_eal/include/rte_service.h           |  8 ++++++--
 lib/librte_eal/include/rte_service_component.h |  6 +++++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 70d17a5..b8c465e 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
diff --git a/lib/librte_eal/include/rte_service.h b/lib/librte_eal/include/rte_service.h
index d8701dd..3a1c735 100644
--- a/lib/librte_eal/include/rte_service.h
+++ b/lib/librte_eal/include/rte_service.h
@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
  * Each core can be added or removed from running a specific service. This
  * function enables or disables *lcore* to run *service_id*.
  *
- * If multiple cores are enabled on a service, an atomic is used to ensure that
- * only one cores runs the service at a time. The exception to this is when
+ * If multiple cores are enabled on a service, a lock is used to ensure that
+ * only one core runs the service at a time. The exception to this is when
  * a service indicates that it is multi-thread safe by setting the capability
  * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
  * the service function can be run on multiple threads at the same time.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance by avoiding the use of lock.
+ *
  * @param service_id the service to apply the lcore to
  * @param lcore The lcore that will be mapped to service
  * @param enable Zero to unmap or disable the core, non-zero to enable
diff --git a/lib/librte_eal/include/rte_service_component.h b/lib/librte_eal/include/rte_service_component.h
index 16eab79..b75aba1 100644
--- a/lib/librte_eal/include/rte_service_component.h
+++ b/lib/librte_eal/include/rte_service_component.h
@@ -43,7 +43,7 @@ struct rte_service_spec {
 /**
  * Register a new service.
  *
- * A service represents a component that the requires CPU time periodically to
+ * A service represents a component that requires CPU time periodically to
  * achieve its purpose.
  *
  * For example the eventdev SW PMD requires CPU cycles to perform its
@@ -56,6 +56,10 @@ struct rte_service_spec {
  * *rte_service_component_runstate_set*, which indicates that the service
  * component is ready to be executed.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance.
+ *
  * @param spec The specification of the service to register
  * @param[out] service_id A pointer to a uint32_t, which will be filled in
  *             during registration of the service. It is set to the integers
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v5 2/6] service: fix identification of service running on other lcore
       [not found]           ` <1588760683-11027-1-git-send-email-phil.yang@arm.com>
  2020-05-06 10:24             ` [dpdk-stable] [PATCH v5 1/6] service: fix race condition for MT unsafe service Phil Yang
@ 2020-05-06 10:24             ` Phil Yang
       [not found]             ` <1588778884-13047-1-git-send-email-phil.yang@arm.com>
  2 siblings, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-05-06 10:24 UTC (permalink / raw)
  To: dev, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	Honnappa.Nagarahalli, nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduce the number of
instructions for all cases.

Cc: stable@dpdk.org
Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b8c465e..c283408 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to reflect that this core is
+	 * now mapped capable of running the service.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ rte_service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v6 1/6] service: fix race condition for MT unsafe service
       [not found]             ` <1588778884-13047-1-git-send-email-phil.yang@arm.com>
@ 2020-05-06 15:27               ` Phil Yang
  2020-05-06 15:28               ` [dpdk-stable] [PATCH v6 2/6] service: fix identification of service running on other lcore Phil Yang
  1 sibling, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-05-06 15:27 UTC (permalink / raw)
  To: dev, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	Honnappa.Nagarahalli, nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The MT unsafe service might get configured to run on another core
while the service is running currently. This might result in the
MT unsafe service running on multiple cores simultaneously. Use
'execute_lock' always when the service is MT unsafe.

If the service is known to be mmapped on a single lcore,
setting the service capability to MT safe will avoid taking
the lock and improve the performance.

Fixes: e9139a32f6e8 ("service: add function to run on app lcore")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c            | 11 +++++------
 lib/librte_eal/include/rte_service.h           |  8 ++++++--
 lib/librte_eal/include/rte_service_component.h |  6 +++++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 70d17a5..b8c465e 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -50,6 +50,10 @@ struct rte_service_spec_impl {
 	uint8_t internal_flags;
 
 	/* per service statistics */
+	/* Indicates how many cores the service is mapped to run on.
+	 * It does not indicate the number of cores the service is running
+	 * on currently.
+	 */
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
@@ -370,12 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	/* check do we need cmpset, if MT safe or <= 1 core
-	 * mapped, atomic ops are not required.
-	 */
-	const int use_atomics = (service_mt_safe(s) == 0) &&
-				(rte_atomic32_read(&s->num_mapped_cores) > 1);
-	if (use_atomics) {
+	if (service_mt_safe(s) == 0) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
diff --git a/lib/librte_eal/include/rte_service.h b/lib/librte_eal/include/rte_service.h
index d8701dd..3a1c735 100644
--- a/lib/librte_eal/include/rte_service.h
+++ b/lib/librte_eal/include/rte_service.h
@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
  * Each core can be added or removed from running a specific service. This
  * function enables or disables *lcore* to run *service_id*.
  *
- * If multiple cores are enabled on a service, an atomic is used to ensure that
- * only one cores runs the service at a time. The exception to this is when
+ * If multiple cores are enabled on a service, a lock is used to ensure that
+ * only one core runs the service at a time. The exception to this is when
  * a service indicates that it is multi-thread safe by setting the capability
  * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
  * the service function can be run on multiple threads at the same time.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance by avoiding the use of lock.
+ *
  * @param service_id the service to apply the lcore to
  * @param lcore The lcore that will be mapped to service
  * @param enable Zero to unmap or disable the core, non-zero to enable
diff --git a/lib/librte_eal/include/rte_service_component.h b/lib/librte_eal/include/rte_service_component.h
index 16eab79..b75aba1 100644
--- a/lib/librte_eal/include/rte_service_component.h
+++ b/lib/librte_eal/include/rte_service_component.h
@@ -43,7 +43,7 @@ struct rte_service_spec {
 /**
  * Register a new service.
  *
- * A service represents a component that the requires CPU time periodically to
+ * A service represents a component that requires CPU time periodically to
  * achieve its purpose.
  *
  * For example the eventdev SW PMD requires CPU cycles to perform its
@@ -56,6 +56,10 @@ struct rte_service_spec {
  * *rte_service_component_runstate_set*, which indicates that the service
  * component is ready to be executed.
  *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance.
+ *
  * @param spec The specification of the service to register
  * @param[out] service_id A pointer to a uint32_t, which will be filled in
  *             during registration of the service. It is set to the integers
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [dpdk-stable] [PATCH v6 2/6] service: fix identification of service running on other lcore
       [not found]             ` <1588778884-13047-1-git-send-email-phil.yang@arm.com>
  2020-05-06 15:27               ` [dpdk-stable] [PATCH v6 1/6] service: fix race condition for MT unsafe service Phil Yang
@ 2020-05-06 15:28               ` Phil Yang
  1 sibling, 0 replies; 51+ messages in thread
From: Phil Yang @ 2020-05-06 15:28 UTC (permalink / raw)
  To: dev, harry.van.haaren
  Cc: thomas, david.marchand, konstantin.ananyev, jerinj,
	hemant.agrawal, gage.eads, bruce.richardson,
	Honnappa.Nagarahalli, nd, Honnappa Nagarahalli, stable

From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

The logic to identify if the MT unsafe service is running on another
core can return -EBUSY spuriously. In such cases, running the service
becomes costlier than using atomic operations. Assume that the
application passes the right parameters and reduce the number of
instructions for all cases.

Cc: stable@dpdk.org
Fixes: 8d39d3e237c2 ("service: fix race in service on app lcore function")

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Harry van Haaren <harry.van.haaren@intel.com>
---
 lib/librte_eal/common/rte_service.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index b8c465e..c283408 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -360,7 +360,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 /* Expects the service 's' is valid. */
 static int32_t
 service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
-	    struct rte_service_spec_impl *s)
+	    struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
 {
 	if (!s)
 		return -EINVAL;
@@ -374,7 +374,7 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
 
 	cs->service_active_on_lcore[i] = 1;
 
-	if (service_mt_safe(s) == 0) {
+	if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
 		if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
 			return -EBUSY;
 
@@ -412,24 +412,14 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
 
 	SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
 
-	/* Atomically add this core to the mapped cores first, then examine if
-	 * we can run the service. This avoids a race condition between
-	 * checking the value, and atomically adding to the mapped count.
+	/* Increment num_mapped_cores to reflect that this core is
+	 * now mapped capable of running the service.
 	 */
-	if (serialize_mt_unsafe)
-		rte_atomic32_inc(&s->num_mapped_cores);
+	rte_atomic32_inc(&s->num_mapped_cores);
 
-	if (service_mt_safe(s) == 0 &&
-			rte_atomic32_read(&s->num_mapped_cores) > 1) {
-		if (serialize_mt_unsafe)
-			rte_atomic32_dec(&s->num_mapped_cores);
-		return -EBUSY;
-	}
-
-	int ret = service_run(id, cs, UINT64_MAX, s);
+	int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
 
-	if (serialize_mt_unsafe)
-		rte_atomic32_dec(&s->num_mapped_cores);
+	rte_atomic32_dec(&s->num_mapped_cores);
 
 	return ret;
 }
@@ -449,7 +439,7 @@ rte_service_runner_func(void *arg)
 			if (!service_valid(i))
 				continue;
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask, service_get(i));
+			service_run(i, cs, service_mask, service_get(i), 1);
 		}
 
 		cs->loops++;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, back to index

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1583862551-2049-1-git-send-email-phil.yang@arm.com>
2020-03-10 17:49 ` [dpdk-stable] [PATCH 05/10] service: remove rte prefix from static functions Phil Yang
2020-03-10 17:49 ` [dpdk-stable] [PATCH 06/10] service: remove redundant code Phil Yang
2020-03-10 17:49 ` [dpdk-stable] [PATCH 07/10] service: avoid race condition for MT unsafe service Phil Yang
2020-03-10 17:49 ` [dpdk-stable] [PATCH 08/10] service: identify service running on another core correctly Phil Yang
     [not found] ` <1583993624-20446-1-git-send-email-phil.yang@arm.com>
2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
2020-03-12  6:13   ` [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly Phil Yang
     [not found] ` <1583999071-22872-1-git-send-email-phil.yang@arm.com>
2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 05/10] service: remove rte prefix from static functions Phil Yang
2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 06/10] service: remove redundant code Phil Yang
2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 07/10] service: avoid race condition for MT unsafe service Phil Yang
2020-03-12  7:44   ` [dpdk-stable] [PATCH v2 08/10] service: identify service running on another core correctly Phil Yang
     [not found]   ` <1584407863-774-1-git-send-email-phil.yang@arm.com>
2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 07/12] service: remove rte prefix from static functions Phil Yang
2020-04-03 11:57       ` Van Haaren, Harry
2020-04-08 10:14         ` Phil Yang
2020-04-08 10:36           ` Van Haaren, Harry
2020-04-08 10:49             ` Phil Yang
2020-04-05 21:35       ` Honnappa Nagarahalli
2020-04-08 10:14         ` Phil Yang
     [not found]       ` <1587659482-27133-1-git-send-email-phil.yang@arm.com>
2020-04-23 16:31         ` [dpdk-stable] [PATCH v2 1/6] service: fix race condition for MT unsafe service Phil Yang
2020-04-29 16:51           ` Van Haaren, Harry
2020-04-29 22:48             ` Honnappa Nagarahalli
2020-05-01 14:21               ` Van Haaren, Harry
2020-05-01 14:56                 ` Honnappa Nagarahalli
2020-05-01 17:51                   ` Van Haaren, Harry
2020-04-23 16:31         ` [dpdk-stable] [PATCH v2 2/6] service: identify service running on another core correctly Phil Yang
     [not found]         ` <20200502000245.11071-1-honnappa.nagarahalli@arm.com>
2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 1/6] service: fix race condition for MT unsafe service Honnappa Nagarahalli
2020-05-05 14:48             ` Van Haaren, Harry
2020-05-02  0:02           ` [dpdk-stable] [PATCH v3 2/6] service: identify service running on another core correctly Honnappa Nagarahalli
2020-05-05 14:48             ` Van Haaren, Harry
     [not found]         ` <20200505211732.25291-1-honnappa.nagarahalli@arm.com>
2020-05-05 21:17           ` [dpdk-stable] [PATCH v4 1/6] service: fix race condition for MT unsafe service Honnappa Nagarahalli
2020-05-05 21:17           ` [dpdk-stable] [PATCH v4 2/6] service: fix identification of service running on other lcore Honnappa Nagarahalli
     [not found]           ` <1588760683-11027-1-git-send-email-phil.yang@arm.com>
2020-05-06 10:24             ` [dpdk-stable] [PATCH v5 1/6] service: fix race condition for MT unsafe service Phil Yang
2020-05-06 10:24             ` [dpdk-stable] [PATCH v5 2/6] service: fix identification of service running on other lcore Phil Yang
     [not found]             ` <1588778884-13047-1-git-send-email-phil.yang@arm.com>
2020-05-06 15:27               ` [dpdk-stable] [PATCH v6 1/6] service: fix race condition for MT unsafe service Phil Yang
2020-05-06 15:28               ` [dpdk-stable] [PATCH v6 2/6] service: fix identification of service running on other lcore Phil Yang
2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 08/12] service: remove redundant code Phil Yang
2020-04-03 11:58       ` Van Haaren, Harry
2020-04-05 18:35         ` Honnappa Nagarahalli
2020-04-08 10:15           ` Phil Yang
2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 09/12] service: avoid race condition for MT unsafe service Phil Yang
2020-04-03 11:58       ` Van Haaren, Harry
2020-04-04 18:03         ` Honnappa Nagarahalli
2020-04-08 18:05           ` Van Haaren, Harry
2020-04-09  1:31             ` Honnappa Nagarahalli
2020-04-09 16:46               ` Van Haaren, Harry
2020-04-18  6:21                 ` Honnappa Nagarahalli
2020-04-21 17:43                   ` Van Haaren, Harry
2020-03-17  1:17     ` [dpdk-stable] [PATCH v3 10/12] service: identify service running on another core correctly Phil Yang
2020-04-03 11:58       ` Van Haaren, Harry
2020-04-05  2:43         ` Honnappa Nagarahalli

patches for DPDK stable branches

Archives are clonable:
	git clone --mirror http://inbox.dpdk.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ http://inbox.dpdk.org/stable \
		stable@dpdk.org
	public-inbox-index stable


Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.stable


AGPL code for this site: git clone https://public-inbox.org/ public-inbox