* [dpdk-dev] [PATCH 1/3] timer: fix stress test 2 synchronization bug
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
@ 2015-07-23 22:42 ` rsanford2
2015-07-23 22:42 ` [dpdk-dev] [PATCH 2/3] timer: add timer-manage race condition test rsanford2
` (6 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-23 22:42 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
app/test/test_timer.c | 149 ++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 116 insertions(+), 33 deletions(-)
diff --git a/app/test/test_timer.c b/app/test/test_timer.c
index 73da5b6..dd63421 100644
--- a/app/test/test_timer.c
+++ b/app/test/test_timer.c
@@ -137,13 +137,13 @@
#include <rte_random.h>
#include <rte_malloc.h>
-
#define TEST_DURATION_S 20 /* in seconds */
#define NB_TIMER 4
#define RTE_LOGTYPE_TESTTIMER RTE_LOGTYPE_USER3
static volatile uint64_t end_time;
+static volatile int test_failed;
struct mytimerinfo {
struct rte_timer tim;
@@ -230,6 +230,59 @@ timer_stress_main_loop(__attribute__((unused)) void *arg)
return 0;
}
+/* Need to synchronize slave lcores through multiple steps. */
+enum { SLAVE_WAITING=1, SLAVE_RUN_SIGNAL, SLAVE_RUNNING, SLAVE_FINISHED };
+static rte_atomic16_t test_slave_state[RTE_MAX_LCORE];
+
+static void
+master_init_slaves(void)
+{
+ unsigned i;
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ rte_atomic16_set(&test_slave_state[i], SLAVE_WAITING);
+ }
+}
+
+static void
+master_start_slaves(void)
+{
+ unsigned i;
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ rte_atomic16_set(&test_slave_state[i], SLAVE_RUN_SIGNAL);
+ }
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ while (rte_atomic16_read(&test_slave_state[i]) != SLAVE_RUNNING)
+ rte_pause();
+ }
+}
+
+static void
+master_wait_for_slaves(void)
+{
+ unsigned i;
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ while (rte_atomic16_read(&test_slave_state[i]) != SLAVE_FINISHED)
+ rte_pause();
+ }
+}
+
+static void
+slave_wait_to_start(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+ while (rte_atomic16_read(&test_slave_state[lcore_id]) != SLAVE_RUN_SIGNAL)
+ rte_pause();
+ rte_atomic16_set(&test_slave_state[lcore_id], SLAVE_RUNNING);
+}
+
+static void
+slave_finish(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+ rte_atomic16_set(&test_slave_state[lcore_id], SLAVE_FINISHED);
+}
+
+
static volatile int cb_count = 0;
/* callback for second stress test. will only be called
@@ -247,31 +300,37 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
{
static struct rte_timer *timers;
int i, ret;
- static volatile int ready = 0;
uint64_t delay = rte_get_timer_hz() / 4;
unsigned lcore_id = rte_lcore_id();
+ unsigned master = rte_get_master_lcore();
int32_t my_collisions = 0;
- static rte_atomic32_t collisions = RTE_ATOMIC32_INIT(0);
+ static rte_atomic32_t collisions;
- if (lcore_id == rte_get_master_lcore()) {
+ if (lcore_id == master) {
cb_count = 0;
+ test_failed = 0;
+ rte_atomic32_set(&collisions, 0);
+ master_init_slaves();
timers = rte_malloc(NULL, sizeof(*timers) * NB_STRESS2_TIMERS, 0);
if (timers == NULL) {
printf("Test Failed\n");
printf("- Cannot allocate memory for timers\n" );
- return -1;
+ test_failed = 1;
+ master_start_slaves();
+ goto cleanup;
}
for (i = 0; i < NB_STRESS2_TIMERS; i++)
rte_timer_init(&timers[i]);
- ready = 1;
+ master_start_slaves();
} else {
- while (!ready)
- rte_pause();
+ slave_wait_to_start();
+ if (test_failed)
+ goto cleanup;
}
/* have all cores schedule all timers on master lcore */
for (i = 0; i < NB_STRESS2_TIMERS; i++) {
- ret = rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(),
+ ret = rte_timer_reset(&timers[i], delay, SINGLE, master,
timer_stress2_cb, NULL);
/* there will be collisions when multiple cores simultaneously
* configure the same timers */
@@ -281,11 +340,18 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
if (my_collisions != 0)
rte_atomic32_add(&collisions, my_collisions);
- ready = 0;
+ /* wait long enough for timers to expire */
rte_delay_ms(500);
+ /* all cores rendezvous */
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ } else {
+ slave_finish();
+ }
+
/* now check that we get the right number of callbacks */
- if (lcore_id == rte_get_master_lcore()) {
+ if (lcore_id == master) {
my_collisions = rte_atomic32_read(&collisions);
if (my_collisions != 0)
printf("- %d timer reset collisions (OK)\n", my_collisions);
@@ -295,49 +361,63 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
printf("- Stress test 2, part 1 failed\n");
printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS,
cb_count);
- return -1;
+ test_failed = 1;
+ master_start_slaves();
+ goto cleanup;
}
- ready = 1;
+ cb_count = 0;
+
+ /* proceed */
+ master_start_slaves();
} else {
- while (!ready)
- rte_pause();
+ /* proceed */
+ slave_wait_to_start();
+ if (test_failed)
+ goto cleanup;
}
/* now test again, just stop and restart timers at random after init*/
for (i = 0; i < NB_STRESS2_TIMERS; i++)
- rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(),
+ rte_timer_reset(&timers[i], delay, SINGLE, master,
timer_stress2_cb, NULL);
- cb_count = 0;
/* pick random timer to reset, stopping them first half the time */
for (i = 0; i < 100000; i++) {
int r = rand() % NB_STRESS2_TIMERS;
if (i % 2)
rte_timer_stop(&timers[r]);
- rte_timer_reset(&timers[r], delay, SINGLE, rte_get_master_lcore(),
+ rte_timer_reset(&timers[r], delay, SINGLE, master,
timer_stress2_cb, NULL);
}
+ /* wait long enough for timers to expire */
rte_delay_ms(500);
/* now check that we get the right number of callbacks */
- if (lcore_id == rte_get_master_lcore()) {
- rte_timer_manage();
-
- /* clean up statics, in case we run again */
- rte_free(timers);
- timers = NULL;
- ready = 0;
- rte_atomic32_set(&collisions, 0);
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ rte_timer_manage();
if (cb_count != NB_STRESS2_TIMERS) {
printf("Test Failed\n");
printf("- Stress test 2, part 2 failed\n");
printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS,
cb_count);
- return -1;
+ test_failed = 1;
+ } else {
+ printf("Test OK\n");
}
- printf("Test OK\n");
+ }
+
+cleanup:
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ if (timers != NULL) {
+ rte_free(timers);
+ timers = NULL;
+ }
+ } else {
+ slave_finish();
}
return 0;
@@ -485,12 +565,12 @@ test_timer(void)
/* sanity check our timer sources and timer config values */
if (timer_sanity_check() < 0) {
printf("Timer sanity checks failed\n");
- return -1;
+ return TEST_FAILED;
}
if (rte_lcore_count() < 2) {
printf("not enough lcores for this test\n");
- return -1;
+ return TEST_FAILED;
}
/* init timer */
@@ -514,9 +594,12 @@ test_timer(void)
rte_timer_stop_sync(&mytiminfo[0].tim);
/* run a second, slightly different set of stress tests */
- printf("Start timer stress tests 2\n");
+ printf("\nStart timer stress tests 2\n");
+ test_failed = 0;
rte_eal_mp_remote_launch(timer_stress2_main_loop, NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
+ if (test_failed)
+ return TEST_FAILED;
/* calculate the "end of test" time */
cur_time = rte_get_timer_cycles();
@@ -524,7 +607,7 @@ test_timer(void)
end_time = cur_time + (hz * TEST_DURATION_S);
/* start other cores */
- printf("Start timer basic tests (%d seconds)\n", TEST_DURATION_S);
+ printf("\nStart timer basic tests (%d seconds)\n", TEST_DURATION_S);
rte_eal_mp_remote_launch(timer_basic_main_loop, NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
@@ -535,7 +618,7 @@ test_timer(void)
rte_timer_dump_stats(stdout);
- return 0;
+ return TEST_SUCCESS;
}
static struct test_command timer_cmd = {
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 2/3] timer: add timer-manage race condition test
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
2015-07-23 22:42 ` [dpdk-dev] [PATCH 1/3] timer: fix stress test 2 synchronization bug rsanford2
@ 2015-07-23 22:42 ` rsanford2
2015-07-23 22:42 ` [dpdk-dev] [PATCH 3/3] timer: fix race condition in rte_timer_manage() rsanford2
` (5 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-23 22:42 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
app/test/Makefile | 1 +
app/test/test_timer_racecond.c | 209 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 210 insertions(+), 0 deletions(-)
create mode 100644 app/test/test_timer_racecond.c
diff --git a/app/test/Makefile b/app/test/Makefile
index caa359c..e7f148f 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -71,6 +71,7 @@ SRCS-y += test_rwlock.c
SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer.c
SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer_racecond.c
SRCS-y += test_mempool.c
SRCS-y += test_mempool_perf.c
diff --git a/app/test/test_timer_racecond.c b/app/test/test_timer_racecond.c
new file mode 100644
index 0000000..6f60a7a
--- /dev/null
+++ b/app/test/test_timer_racecond.c
@@ -0,0 +1,209 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Akamai Technologies.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <rte_cycles.h>
+#include <rte_timer.h>
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_random.h>
+#include <rte_malloc.h>
+
+#undef TEST_TIMER_RACECOND_VERBOSE
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#define usec_delay(us) usleep(us)
+#else
+#define usec_delay(us) rte_delay_us(us)
+#endif
+
+#define BILLION (1UL << 30)
+
+#define TEST_DURATION_S 20 /* in seconds */
+#define N_TIMERS 50
+
+static struct rte_timer timer[N_TIMERS];
+static unsigned timer_lcore_id[N_TIMERS];
+
+static unsigned master;
+static volatile unsigned stop_slaves;
+
+static int reload_timer(struct rte_timer *tim);
+
+static void
+timer_cb(struct rte_timer *tim, void *arg __rte_unused)
+{
+ /* Simulate slow callback function, 100 us. */
+ rte_delay_us(100);
+
+#ifdef TEST_TIMER_RACECOND_VERBOSE
+ if (tim == &timer[0])
+ printf("------------------------------------------------\n");
+ printf("timer_cb: core %u timer %lu\n",
+ rte_lcore_id(), tim - timer);
+#endif
+ (void)reload_timer(tim);
+}
+
+RTE_DEFINE_PER_LCORE(unsigned, n_reset_collisions);
+
+static int
+reload_timer(struct rte_timer *tim)
+{
+ /* Make timer expire roughly when the TSC hits the next BILLION multiple.
+ * Add in timer's index to make them expire in somewhat sorted order.
+ * This makes all timers somewhat synchronized, firing ~2-3 times per
+ * second, assuming 2-3 GHz TSCs.
+ */
+ uint64_t ticks = BILLION - (rte_get_timer_cycles() % BILLION) +
+ (tim - timer);
+ int ret;
+
+ ret = rte_timer_reset(tim, ticks, PERIODICAL, master, timer_cb, NULL);
+ if (ret != 0) {
+#ifdef TEST_TIMER_RACECOND_VERBOSE
+ printf("- core %u failed to reset timer %lu (OK)\n",
+ rte_lcore_id(), tim - timer);
+#endif
+ RTE_PER_LCORE(n_reset_collisions) += 1;
+ }
+ return ret;
+}
+
+static int
+slave_main_loop(__attribute__((unused)) void *arg)
+{
+ unsigned lcore_id = rte_lcore_id();
+ unsigned i;
+
+ RTE_PER_LCORE(n_reset_collisions) = 0;
+
+ printf("Starting main loop on core %u\n", lcore_id);
+
+ while (!stop_slaves) {
+ /* Wait until the timer manager is running.
+ * We know it's running when we see timer[0] NOT pending.
+ */
+ if (rte_timer_pending(&timer[0])) {
+ rte_pause();
+ continue;
+ }
+
+ /* Now, go cause some havoc!
+ * Reload our timers.
+ */
+ for (i = 0; i < N_TIMERS; i++) {
+ if (timer_lcore_id[i] == lcore_id)
+ (void)reload_timer(&timer[i]);
+ }
+ usec_delay(100*1000); /* sleep 100 ms */
+ }
+
+ if (RTE_PER_LCORE(n_reset_collisions) != 0) {
+ printf("- core %u, %u reset collisions (OK)\n",
+ lcore_id, RTE_PER_LCORE(n_reset_collisions));
+ }
+ return 0;
+}
+
+static int
+test_timer_racecond(void)
+{
+ int ret;
+ uint64_t hz;
+ uint64_t cur_time;
+ uint64_t end_time;
+ int64_t diff = 0;
+ unsigned lcore_id;
+ unsigned i;
+
+ master = lcore_id = rte_lcore_id();
+ hz = rte_get_timer_hz();
+
+ /* init and start timers */
+ for (i = 0; i < N_TIMERS; i++) {
+ rte_timer_init(&timer[i]);
+ ret = reload_timer(&timer[i]);
+ TEST_ASSERT(ret == 0, "reload_timer failed");
+
+ /* Distribute timers to slaves.
+ * Note that we assign timer[0] to the master.
+ */
+ timer_lcore_id[i] = lcore_id;
+ lcore_id = rte_get_next_lcore(lcore_id, 1, 1);
+ }
+
+ /* calculate the "end of test" time */
+ cur_time = rte_get_timer_cycles();
+ end_time = cur_time + (hz * TEST_DURATION_S);
+
+ /* start slave cores */
+ stop_slaves = 0;
+ printf("Start timer manage race condition test (%u seconds)\n",
+ TEST_DURATION_S);
+ rte_eal_mp_remote_launch(slave_main_loop, NULL, SKIP_MASTER);
+
+ while (diff >= 0) {
+ /* run the timers */
+ rte_timer_manage();
+
+ /* wait 100 ms */
+ usec_delay(100*1000);
+
+ cur_time = rte_get_timer_cycles();
+ diff = end_time - cur_time;
+ }
+
+ /* stop slave cores */
+ printf("Stopping timer manage race condition test\n");
+ stop_slaves = 1;
+ rte_eal_mp_wait_lcore();
+
+ /* stop timers */
+ for (i = 0; i < N_TIMERS; i++) {
+ ret = rte_timer_stop(&timer[i]);
+ TEST_ASSERT(ret == 0, "rte_timer_stop failed");
+ }
+
+ return TEST_SUCCESS;
+}
+
+static struct test_command timer_racecond_cmd = {
+ .command = "timer_racecond_autotest",
+ .callback = test_timer_racecond,
+};
+REGISTER_TEST_COMMAND(timer_racecond_cmd);
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH 3/3] timer: fix race condition in rte_timer_manage()
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
2015-07-23 22:42 ` [dpdk-dev] [PATCH 1/3] timer: fix stress test 2 synchronization bug rsanford2
2015-07-23 22:42 ` [dpdk-dev] [PATCH 2/3] timer: add timer-manage race condition test rsanford2
@ 2015-07-23 22:42 ` rsanford2
2015-07-26 14:11 ` [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests Thomas Monjalon
` (4 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-23 22:42 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
lib/librte_timer/rte_timer.c | 45 +++++++++++++++++++++++++++--------------
1 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 8e9243a..51e6038 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -504,6 +504,7 @@ void rte_timer_manage(void)
{
union rte_timer_status status;
struct rte_timer *tim, *next_tim;
+ struct rte_timer *run_first_tim, **pprev;
unsigned lcore_id = rte_lcore_id();
struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
uint64_t cur_time;
@@ -531,8 +532,10 @@ void rte_timer_manage(void)
/* if nothing to do just unlock and return */
if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
- priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time)
- goto done;
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ return;
+ }
/* save start of list of expired timers */
tim = priv_timer[lcore_id].pending_head.sl_next[0];
@@ -546,24 +549,40 @@ void rte_timer_manage(void)
prev[i] ->sl_next[i] = NULL;
}
- /* now scan expired list and call callbacks */
+ /* transition run-list from PENDING to RUNNING */
+ run_first_tim = tim;
+ pprev = &run_first_tim;
+
for ( ; tim != NULL; tim = next_tim) {
next_tim = tim->sl_next[0];
ret = timer_set_running_state(tim);
+ if (likely(ret == 0)) {
+ pprev = &tim->sl_next[0];
+ } else {
+ /* another core is trying to re-config this one,
+ * remove it from local expired list and put it
+ * back on the priv_timer[] skip list */
+ *pprev = next_tim;
+ timer_add(tim, lcore_id, 1);
+ }
+ }
- /* this timer was not pending, continue */
- if (ret < 0)
- continue;
+ /* update the next to expire timer value */
+ priv_timer[lcore_id].pending_head.expire =
+ (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire;
- rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ /* now scan expired list and call callbacks */
+ for (tim = run_first_tim; tim != NULL; tim = next_tim) {
+ next_tim = tim->sl_next[0];
priv_timer[lcore_id].updated = 0;
/* execute callback function with list unlocked */
tim->f(tim, tim->arg);
- rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
__TIMER_STAT_ADD(pending, -1);
/* the timer was stopped or reloaded by the callback
* function, we have nothing to do here */
@@ -579,6 +598,7 @@ void rte_timer_manage(void)
}
else {
/* keep it in list and mark timer as pending */
+ rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
status.state = RTE_TIMER_PENDING;
__TIMER_STAT_ADD(pending, 1);
status.owner = (int16_t)lcore_id;
@@ -586,16 +606,9 @@ void rte_timer_manage(void)
tim->status.u32 = status.u32;
__rte_timer_reset(tim, cur_time + tim->period,
tim->period, lcore_id, tim->f, tim->arg, 1);
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
}
}
-
- /* update the next to expire timer value */
- priv_timer[lcore_id].pending_head.expire =
- (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
- priv_timer[lcore_id].pending_head.sl_next[0]->expire;
-done:
- /* job finished, unlock the list lock */
- rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
}
/* dump statistics about timers */
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
` (2 preceding siblings ...)
2015-07-23 22:42 ` [dpdk-dev] [PATCH 3/3] timer: fix race condition in rte_timer_manage() rsanford2
@ 2015-07-26 14:11 ` Thomas Monjalon
2015-07-27 15:46 ` Sanford, Robert
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 " rsanford2
` (3 subsequent siblings)
7 siblings, 1 reply; 12+ messages in thread
From: Thomas Monjalon @ 2015-07-26 14:11 UTC (permalink / raw)
To: rsanford2; +Cc: dev
2015-07-23 18:42, rsanford2@gmail.com:
> From: Robert Sanford <rsanford@akamai.com>
>
> This patchset fixes a bug in timer stress test 2, adds a new stress test
> to expose a race condition bug in API rte_timer_manage(), and then fixes
> the rte_timer_manage() bug.
> --
>
> Patch 1, app/test timer stress test 2: Sometimes this test fails and
> seg-faults because the slave lcores get out of phase with the master.
> The master uses a single int, 'ready', to synchronize multiple slave
> lcores through multiple phases of the test.
>
> To resolve, we construct simple synchronization primitives that use one
> atomic-int state variable per slave. The master tells the slaves when to
> start, and then waits for all of them to finish. Each slave waits for
> the master to tell it to start, and then tells the master when it has
> finished.
> --
>
> Description of rte_timer_manage() race condition bug: Through code
> inspection, we noticed a potential problem in rte_timer_manage() that
> leads to corruption of per-lcore pending-lists (implemented as
> skip-lists). The race condition occurs when rte_timer_manage() expires
> multiple timers on lcore A, while lcore B simultaneously invokes
> rte_timer_reset() for one of the expiring timers (other than the first
> one).
>
> Lcore A splits its pending-list, creating a local list of expired timers
> linked through their sl_next[0] pointers, and sets the first expired
> timer to the RUNNING state, all during one list-lock round trip. Lcore A
> then unlocks the list-lock to run the first callback, and that is when
> lcore B can misinterpret the subsequent expired timers' true states.
> Lcore B sees an expired timer still in the PENDING state, locks A's
> list-lock, and reinserts the timer into A's pending-list. We are trying
> to use the same pointer to belong to both lists!
>
> One solution is to remove timers from the pending-list and set them to
> the RUNNING state in one atomic step, i.e., we should perform these two
> actions within one ownership of the list-lock.
> --
>
> Patch 2, new timer-manage race-condition test: We wrote a test to
> confirm our suspicion that we could crash rte_timer_manage() (RTM) under
> the right circumstances. We repeatedly set several timers to expire at
> roughly the same time on the master core. The master lcore just delays
> and runs RTM about ten times per second. The slave lcores all watch the
> first timer (timer-0) to see when RTM is running on the master, i.e.,
> timer-0's state is not PENDING. At this point, each slave attempts to
> reset a subset of the timers to a later expiration time. The goal here
> is to have the slaves moving most of the timers to a different place in
> the master's pending-list, while the master is working with the same
> next-pointers and running callback functions. This eventually results in
> the master traversing a corrupted linked-list. In our observations, it
> results in an infinite loop.
> --
>
> Patch 3, eliminate race condition in rte_timer_manage(): After splitting
> the pending-list at the current time point, we try to set all expired
> timers to the RUNNING state, and put back into the pending-list any
> timers that we fail to set to the RUNNING state, all during one
> list-lock round trip. It is then safe to run the callback functions
> for all expired timers that remain on our local list, without the lock.
Please, could you re-send this serie after having added the description of
each patch in the commit messages?
It seems you fix 2 bugs in the first patch. It may be clearer to split it
in 2 patches with separate explanations.
Thanks
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests
2015-07-26 14:11 ` [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests Thomas Monjalon
@ 2015-07-27 15:46 ` Sanford, Robert
2015-07-27 15:53 ` Thomas Monjalon
0 siblings, 1 reply; 12+ messages in thread
From: Sanford, Robert @ 2015-07-27 15:46 UTC (permalink / raw)
To: Thomas Monjalon, dev
Hi Thomas,
> Please, could you re-send this serie after having added the description
>of
> each patch in the commit messages?
Yes, I will move the paragraphs that begin with "Patch n" from patch 0 to
their respective patches.
> It seems you fix 2 bugs in the first patch. It may be clearer to split it
> in 2 patches with separate explanations.
No, I respectfully disagree. The only bug that we address in patch 1 is
that the slaves become out of sync with the master.
The section that begins with "Description of rte_timer_manage() race
condition bug" is a general description to give background info for both
patches 2 and 3. I would like to leave that section in part 0, if it's OK
with you.
--
Thanks,
Robert
>2015-07-23 18:42, rsanford2@gmail.com:
>> From: Robert Sanford <rsanford@akamai.com>
>>
>> This patchset fixes a bug in timer stress test 2, adds a new stress test
>> to expose a race condition bug in API rte_timer_manage(), and then fixes
>> the rte_timer_manage() bug.
>> --
>>
>> Patch 1, app/test timer stress test 2: Sometimes this test fails and
>> seg-faults because the slave lcores get out of phase with the master.
>> The master uses a single int, 'ready', to synchronize multiple slave
>> lcores through multiple phases of the test.
>>
>> To resolve, we construct simple synchronization primitives that use one
>> atomic-int state variable per slave. The master tells the slaves when to
>> start, and then waits for all of them to finish. Each slave waits for
>> the master to tell it to start, and then tells the master when it has
>> finished.
>> --
>>
>> Description of rte_timer_manage() race condition bug: Through code
>> inspection, we noticed a potential problem in rte_timer_manage() that
>> leads to corruption of per-lcore pending-lists (implemented as
>> skip-lists). The race condition occurs when rte_timer_manage() expires
>> multiple timers on lcore A, while lcore B simultaneously invokes
>> rte_timer_reset() for one of the expiring timers (other than the first
>> one).
>>
>> Lcore A splits its pending-list, creating a local list of expired timers
>> linked through their sl_next[0] pointers, and sets the first expired
>> timer to the RUNNING state, all during one list-lock round trip. Lcore A
>> then unlocks the list-lock to run the first callback, and that is when
>> lcore B can misinterpret the subsequent expired timers' true states.
>> Lcore B sees an expired timer still in the PENDING state, locks A's
>> list-lock, and reinserts the timer into A's pending-list. We are trying
>> to use the same pointer to belong to both lists!
>>
>> One solution is to remove timers from the pending-list and set them to
>> the RUNNING state in one atomic step, i.e., we should perform these two
>> actions within one ownership of the list-lock.
>> --
>>
>> Patch 2, new timer-manage race-condition test: We wrote a test to
>> confirm our suspicion that we could crash rte_timer_manage() (RTM) under
>> the right circumstances. We repeatedly set several timers to expire at
>> roughly the same time on the master core. The master lcore just delays
>> and runs RTM about ten times per second. The slave lcores all watch the
>> first timer (timer-0) to see when RTM is running on the master, i.e.,
>> timer-0's state is not PENDING. At this point, each slave attempts to
>> reset a subset of the timers to a later expiration time. The goal here
>> is to have the slaves moving most of the timers to a different place in
>> the master's pending-list, while the master is working with the same
>> next-pointers and running callback functions. This eventually results in
>> the master traversing a corrupted linked-list. In our observations, it
>> results in an infinite loop.
>> --
>>
>> Patch 3, eliminate race condition in rte_timer_manage(): After splitting
>> the pending-list at the current time point, we try to set all expired
>> timers to the RUNNING state, and put back into the pending-list any
>> timers that we fail to set to the RUNNING state, all during one
>> list-lock round trip. It is then safe to run the callback functions
>> for all expired timers that remain on our local list, without the lock.
>
>Please, could you re-send this serie after having added the description of
>each patch in the commit messages?
>It seems you fix 2 bugs in the first patch. It may be clearer to split it
>in 2 patches with separate explanations.
>
>Thanks
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests
2015-07-27 15:46 ` Sanford, Robert
@ 2015-07-27 15:53 ` Thomas Monjalon
0 siblings, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2015-07-27 15:53 UTC (permalink / raw)
To: Sanford, Robert; +Cc: dev
2015-07-27 15:46, Sanford, Robert:
> Hi Thomas,
>
> > Please, could you re-send this serie after having added the description
> >of
> > each patch in the commit messages?
>
> Yes, I will move the paragraphs that begin with "Patch n" from patch 0 to
> their respective patches.
>
> > It seems you fix 2 bugs in the first patch. It may be clearer to split it
> > in 2 patches with separate explanations.
>
> No, I respectfully disagree. The only bug that we address in patch 1 is
> that the slaves become out of sync with the master.
> The section that begins with "Description of rte_timer_manage() race
> condition bug" is a general description to give background info for both
> patches 2 and 3. I would like to leave that section in part 0, if it's OK
> with you.
OK perfect, thanks.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 0/3] timer: fix rte_timer_manage and improve unit tests
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
` (3 preceding siblings ...)
2015-07-26 14:11 ` [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests Thomas Monjalon
@ 2015-07-27 22:46 ` rsanford2
2015-08-02 22:06 ` Thomas Monjalon
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 1/3] timer: fix stress test 2 synchronization bug rsanford2
` (2 subsequent siblings)
7 siblings, 1 reply; 12+ messages in thread
From: rsanford2 @ 2015-07-27 22:46 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
This patchset fixes a bug in timer stress test 2, adds a new stress test
to expose a race condition bug in API rte_timer_manage(), and then fixes
the rte_timer_manage() bug.
Description of rte_timer_manage() race condition bug: Through code
inspection, we notice a potential problem in rte_timer_manage() that
leads to corruption of per-lcore pending-lists (implemented as
skip-lists). The race condition occurs when rte_timer_manage() expires
multiple timers on lcore A, while lcore B simultaneously invokes
rte_timer_reset() for one of the expiring timers (other than the first
one).
Lcore A splits its pending-list, creating a local list of expired timers
linked through their sl_next[0] pointers, and sets the first expired
timer to the RUNNING state, all during one list-lock round trip.
Lcore A then unlocks the list-lock to run the first callback, and that
is when A and B can have different interpretations of the subsequent
expired timers' true state. Lcore B sees an expired timer still in the
PENDING state, atomically changes the timer to the CONFIG state, locks
lcore A's list-lock, and reinserts the timer into A's pending-list.
The two lcores try to use the same next-pointers to maintain both lists!
v2 changes:
Move patch descriptions to their respective patches.
Correct checkpatch warnings.
Robert Sanford (3):
fix stress test 2 sync bug
add timer manage race condition test
fix race condition in rte_timer_manage
app/test/Makefile | 1 +
app/test/test_timer.c | 154 +++++++++++++++++++++++-------
app/test/test_timer_racecond.c | 209 ++++++++++++++++++++++++++++++++++++++++
lib/librte_timer/rte_timer.c | 56 +++++++----
4 files changed, 366 insertions(+), 54 deletions(-)
create mode 100644 app/test/test_timer_racecond.c
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/3] timer: fix rte_timer_manage and improve unit tests
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 " rsanford2
@ 2015-08-02 22:06 ` Thomas Monjalon
0 siblings, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2015-08-02 22:06 UTC (permalink / raw)
To: rsanford2; +Cc: dev
2015-07-27 18:46, rsanford2@gmail.com:
> From: Robert Sanford <rsanford@akamai.com>
>
> This patchset fixes a bug in timer stress test 2, adds a new stress test
> to expose a race condition bug in API rte_timer_manage(), and then fixes
> the rte_timer_manage() bug.
>
> Description of rte_timer_manage() race condition bug: Through code
> inspection, we notice a potential problem in rte_timer_manage() that
> leads to corruption of per-lcore pending-lists (implemented as
> skip-lists). The race condition occurs when rte_timer_manage() expires
> multiple timers on lcore A, while lcore B simultaneously invokes
> rte_timer_reset() for one of the expiring timers (other than the first
> one).
>
> Lcore A splits its pending-list, creating a local list of expired timers
> linked through their sl_next[0] pointers, and sets the first expired
> timer to the RUNNING state, all during one list-lock round trip.
> Lcore A then unlocks the list-lock to run the first callback, and that
> is when A and B can have different interpretations of the subsequent
> expired timers' true state. Lcore B sees an expired timer still in the
> PENDING state, atomically changes the timer to the CONFIG state, locks
> lcore A's list-lock, and reinserts the timer into A's pending-list.
> The two lcores try to use the same next-pointers to maintain both lists!
>
> v2 changes:
> Move patch descriptions to their respective patches.
> Correct checkpatch warnings.
Applied, thanks
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 1/3] timer: fix stress test 2 synchronization bug
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
` (4 preceding siblings ...)
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 " rsanford2
@ 2015-07-27 22:46 ` rsanford2
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 2/3] timer: add timer-manage race condition test rsanford2
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 3/3] timer: fix race condition in rte_timer_manage() rsanford2
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-27 22:46 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Fix app/test timer stress test 2: Sometimes this test fails and
seg-faults because the slave lcores get out of phase with the master.
The master uses a single int, 'ready', to synchronize multiple slave
lcores through multiple phases of the test.
To resolve, we construct simple synchronization primitives that use one
atomic-int state variable per slave. The master tells the slaves when to
start, and then waits for all of them to finish. Each slave waits for
the master to tell it to start, and then tells the master when it has
finished.
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
app/test/test_timer.c | 154 ++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 121 insertions(+), 33 deletions(-)
diff --git a/app/test/test_timer.c b/app/test/test_timer.c
index 73da5b6..944e2ad 100644
--- a/app/test/test_timer.c
+++ b/app/test/test_timer.c
@@ -137,13 +137,13 @@
#include <rte_random.h>
#include <rte_malloc.h>
-
#define TEST_DURATION_S 20 /* in seconds */
#define NB_TIMER 4
#define RTE_LOGTYPE_TESTTIMER RTE_LOGTYPE_USER3
static volatile uint64_t end_time;
+static volatile int test_failed;
struct mytimerinfo {
struct rte_timer tim;
@@ -230,6 +230,64 @@ timer_stress_main_loop(__attribute__((unused)) void *arg)
return 0;
}
+/* Need to synchronize slave lcores through multiple steps. */
+enum { SLAVE_WAITING = 1, SLAVE_RUN_SIGNAL, SLAVE_RUNNING, SLAVE_FINISHED };
+static rte_atomic16_t slave_state[RTE_MAX_LCORE];
+
+static void
+master_init_slaves(void)
+{
+ unsigned i;
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ rte_atomic16_set(&slave_state[i], SLAVE_WAITING);
+ }
+}
+
+static void
+master_start_slaves(void)
+{
+ unsigned i;
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ rte_atomic16_set(&slave_state[i], SLAVE_RUN_SIGNAL);
+ }
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ while (rte_atomic16_read(&slave_state[i]) != SLAVE_RUNNING)
+ rte_pause();
+ }
+}
+
+static void
+master_wait_for_slaves(void)
+{
+ unsigned i;
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ while (rte_atomic16_read(&slave_state[i]) != SLAVE_FINISHED)
+ rte_pause();
+ }
+}
+
+static void
+slave_wait_to_start(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ while (rte_atomic16_read(&slave_state[lcore_id]) != SLAVE_RUN_SIGNAL)
+ rte_pause();
+ rte_atomic16_set(&slave_state[lcore_id], SLAVE_RUNNING);
+}
+
+static void
+slave_finish(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ rte_atomic16_set(&slave_state[lcore_id], SLAVE_FINISHED);
+}
+
+
static volatile int cb_count = 0;
/* callback for second stress test. will only be called
@@ -247,31 +305,37 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
{
static struct rte_timer *timers;
int i, ret;
- static volatile int ready = 0;
uint64_t delay = rte_get_timer_hz() / 4;
unsigned lcore_id = rte_lcore_id();
+ unsigned master = rte_get_master_lcore();
int32_t my_collisions = 0;
- static rte_atomic32_t collisions = RTE_ATOMIC32_INIT(0);
+ static rte_atomic32_t collisions;
- if (lcore_id == rte_get_master_lcore()) {
+ if (lcore_id == master) {
cb_count = 0;
+ test_failed = 0;
+ rte_atomic32_set(&collisions, 0);
+ master_init_slaves();
timers = rte_malloc(NULL, sizeof(*timers) * NB_STRESS2_TIMERS, 0);
if (timers == NULL) {
printf("Test Failed\n");
printf("- Cannot allocate memory for timers\n" );
- return -1;
+ test_failed = 1;
+ master_start_slaves();
+ goto cleanup;
}
for (i = 0; i < NB_STRESS2_TIMERS; i++)
rte_timer_init(&timers[i]);
- ready = 1;
+ master_start_slaves();
} else {
- while (!ready)
- rte_pause();
+ slave_wait_to_start();
+ if (test_failed)
+ goto cleanup;
}
/* have all cores schedule all timers on master lcore */
for (i = 0; i < NB_STRESS2_TIMERS; i++) {
- ret = rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(),
+ ret = rte_timer_reset(&timers[i], delay, SINGLE, master,
timer_stress2_cb, NULL);
/* there will be collisions when multiple cores simultaneously
* configure the same timers */
@@ -281,11 +345,18 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
if (my_collisions != 0)
rte_atomic32_add(&collisions, my_collisions);
- ready = 0;
+ /* wait long enough for timers to expire */
rte_delay_ms(500);
+ /* all cores rendezvous */
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ } else {
+ slave_finish();
+ }
+
/* now check that we get the right number of callbacks */
- if (lcore_id == rte_get_master_lcore()) {
+ if (lcore_id == master) {
my_collisions = rte_atomic32_read(&collisions);
if (my_collisions != 0)
printf("- %d timer reset collisions (OK)\n", my_collisions);
@@ -295,49 +366,63 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg)
printf("- Stress test 2, part 1 failed\n");
printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS,
cb_count);
- return -1;
+ test_failed = 1;
+ master_start_slaves();
+ goto cleanup;
}
- ready = 1;
+ cb_count = 0;
+
+ /* proceed */
+ master_start_slaves();
} else {
- while (!ready)
- rte_pause();
+ /* proceed */
+ slave_wait_to_start();
+ if (test_failed)
+ goto cleanup;
}
/* now test again, just stop and restart timers at random after init*/
for (i = 0; i < NB_STRESS2_TIMERS; i++)
- rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(),
+ rte_timer_reset(&timers[i], delay, SINGLE, master,
timer_stress2_cb, NULL);
- cb_count = 0;
/* pick random timer to reset, stopping them first half the time */
for (i = 0; i < 100000; i++) {
int r = rand() % NB_STRESS2_TIMERS;
if (i % 2)
rte_timer_stop(&timers[r]);
- rte_timer_reset(&timers[r], delay, SINGLE, rte_get_master_lcore(),
+ rte_timer_reset(&timers[r], delay, SINGLE, master,
timer_stress2_cb, NULL);
}
+ /* wait long enough for timers to expire */
rte_delay_ms(500);
/* now check that we get the right number of callbacks */
- if (lcore_id == rte_get_master_lcore()) {
- rte_timer_manage();
-
- /* clean up statics, in case we run again */
- rte_free(timers);
- timers = NULL;
- ready = 0;
- rte_atomic32_set(&collisions, 0);
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ rte_timer_manage();
if (cb_count != NB_STRESS2_TIMERS) {
printf("Test Failed\n");
printf("- Stress test 2, part 2 failed\n");
printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS,
cb_count);
- return -1;
+ test_failed = 1;
+ } else {
+ printf("Test OK\n");
}
- printf("Test OK\n");
+ }
+
+cleanup:
+ if (lcore_id == master) {
+ master_wait_for_slaves();
+ if (timers != NULL) {
+ rte_free(timers);
+ timers = NULL;
+ }
+ } else {
+ slave_finish();
}
return 0;
@@ -485,12 +570,12 @@ test_timer(void)
/* sanity check our timer sources and timer config values */
if (timer_sanity_check() < 0) {
printf("Timer sanity checks failed\n");
- return -1;
+ return TEST_FAILED;
}
if (rte_lcore_count() < 2) {
printf("not enough lcores for this test\n");
- return -1;
+ return TEST_FAILED;
}
/* init timer */
@@ -514,9 +599,12 @@ test_timer(void)
rte_timer_stop_sync(&mytiminfo[0].tim);
/* run a second, slightly different set of stress tests */
- printf("Start timer stress tests 2\n");
+ printf("\nStart timer stress tests 2\n");
+ test_failed = 0;
rte_eal_mp_remote_launch(timer_stress2_main_loop, NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
+ if (test_failed)
+ return TEST_FAILED;
/* calculate the "end of test" time */
cur_time = rte_get_timer_cycles();
@@ -524,7 +612,7 @@ test_timer(void)
end_time = cur_time + (hz * TEST_DURATION_S);
/* start other cores */
- printf("Start timer basic tests (%d seconds)\n", TEST_DURATION_S);
+ printf("\nStart timer basic tests (%d seconds)\n", TEST_DURATION_S);
rte_eal_mp_remote_launch(timer_basic_main_loop, NULL, CALL_MASTER);
rte_eal_mp_wait_lcore();
@@ -535,7 +623,7 @@ test_timer(void)
rte_timer_dump_stats(stdout);
- return 0;
+ return TEST_SUCCESS;
}
static struct test_command timer_cmd = {
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 2/3] timer: add timer-manage race condition test
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
` (5 preceding siblings ...)
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 1/3] timer: fix stress test 2 synchronization bug rsanford2
@ 2015-07-27 22:46 ` rsanford2
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 3/3] timer: fix race condition in rte_timer_manage() rsanford2
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-27 22:46 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Add new timer-manage race-condition test: We wrote a test to confirm
our suspicion that we could crash rte_timer_manage() under the right
circumstances. We repeatedly set several timers to expire at roughly
the same time on the master core. The master lcore just delays and runs
rte_timer_manage() about ten times per second. The slave lcores all
watch the first timer (timer-0) to see when rte_timer_manage() is
running on the master, i.e., timer-0's state is not PENDING.
At this point, each slave attempts to reset a subset of the timers to
a later expiration time. The goal here is to have the slaves moving
most of the timers to a different place in the master's pending-list,
while the master is traversing the same next-pointers (the slaves'
sl_next[0] pointers) and running callback functions. This eventually
results in the master traversing a corrupted linked-list.
In our observations, it results in an infinite loop.
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
app/test/Makefile | 1 +
app/test/test_timer_racecond.c | 209 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 210 insertions(+), 0 deletions(-)
create mode 100644 app/test/test_timer_racecond.c
diff --git a/app/test/Makefile b/app/test/Makefile
index caa359c..e7f148f 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -71,6 +71,7 @@ SRCS-y += test_rwlock.c
SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer.c
SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer_perf.c
+SRCS-$(CONFIG_RTE_LIBRTE_TIMER) += test_timer_racecond.c
SRCS-y += test_mempool.c
SRCS-y += test_mempool_perf.c
diff --git a/app/test/test_timer_racecond.c b/app/test/test_timer_racecond.c
new file mode 100644
index 0000000..32693d8
--- /dev/null
+++ b/app/test/test_timer_racecond.c
@@ -0,0 +1,209 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Akamai Technologies.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <rte_cycles.h>
+#include <rte_timer.h>
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_random.h>
+#include <rte_malloc.h>
+
+#undef TEST_TIMER_RACECOND_VERBOSE
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#define usec_delay(us) usleep(us)
+#else
+#define usec_delay(us) rte_delay_us(us)
+#endif
+
+#define BILLION (1UL << 30)
+
+#define TEST_DURATION_S 20 /* in seconds */
+#define N_TIMERS 50
+
+static struct rte_timer timer[N_TIMERS];
+static unsigned timer_lcore_id[N_TIMERS];
+
+static unsigned master;
+static volatile unsigned stop_slaves;
+
+static int reload_timer(struct rte_timer *tim);
+
+static void
+timer_cb(struct rte_timer *tim, void *arg __rte_unused)
+{
+ /* Simulate slow callback function, 100 us. */
+ rte_delay_us(100);
+
+#ifdef TEST_TIMER_RACECOND_VERBOSE
+ if (tim == &timer[0])
+ printf("------------------------------------------------\n");
+ printf("timer_cb: core %u timer %lu\n",
+ rte_lcore_id(), tim - timer);
+#endif
+ (void)reload_timer(tim);
+}
+
+RTE_DEFINE_PER_LCORE(unsigned, n_reset_collisions);
+
+static int
+reload_timer(struct rte_timer *tim)
+{
+ /* Make timer expire roughly when the TSC hits the next BILLION
+ * multiple. Add in timer's index to make them expire in nearly
+ * sorted order. This makes all timers somewhat synchronized,
+ * firing ~2-3 times per second, assuming 2-3 GHz TSCs.
+ */
+ uint64_t ticks = BILLION - (rte_get_timer_cycles() % BILLION) +
+ (tim - timer);
+ int ret;
+
+ ret = rte_timer_reset(tim, ticks, PERIODICAL, master, timer_cb, NULL);
+ if (ret != 0) {
+#ifdef TEST_TIMER_RACECOND_VERBOSE
+ printf("- core %u failed to reset timer %lu (OK)\n",
+ rte_lcore_id(), tim - timer);
+#endif
+ RTE_PER_LCORE(n_reset_collisions) += 1;
+ }
+ return ret;
+}
+
+static int
+slave_main_loop(__attribute__((unused)) void *arg)
+{
+ unsigned lcore_id = rte_lcore_id();
+ unsigned i;
+
+ RTE_PER_LCORE(n_reset_collisions) = 0;
+
+ printf("Starting main loop on core %u\n", lcore_id);
+
+ while (!stop_slaves) {
+ /* Wait until the timer manager is running.
+ * We know it's running when we see timer[0] NOT pending.
+ */
+ if (rte_timer_pending(&timer[0])) {
+ rte_pause();
+ continue;
+ }
+
+ /* Now, go cause some havoc!
+ * Reload our timers.
+ */
+ for (i = 0; i < N_TIMERS; i++) {
+ if (timer_lcore_id[i] == lcore_id)
+ (void)reload_timer(&timer[i]);
+ }
+ usec_delay(100*1000); /* sleep 100 ms */
+ }
+
+ if (RTE_PER_LCORE(n_reset_collisions) != 0) {
+ printf("- core %u, %u reset collisions (OK)\n",
+ lcore_id, RTE_PER_LCORE(n_reset_collisions));
+ }
+ return 0;
+}
+
+static int
+test_timer_racecond(void)
+{
+ int ret;
+ uint64_t hz;
+ uint64_t cur_time;
+ uint64_t end_time;
+ int64_t diff = 0;
+ unsigned lcore_id;
+ unsigned i;
+
+ master = lcore_id = rte_lcore_id();
+ hz = rte_get_timer_hz();
+
+ /* init and start timers */
+ for (i = 0; i < N_TIMERS; i++) {
+ rte_timer_init(&timer[i]);
+ ret = reload_timer(&timer[i]);
+ TEST_ASSERT(ret == 0, "reload_timer failed");
+
+ /* Distribute timers to slaves.
+ * Note that we assign timer[0] to the master.
+ */
+ timer_lcore_id[i] = lcore_id;
+ lcore_id = rte_get_next_lcore(lcore_id, 1, 1);
+ }
+
+ /* calculate the "end of test" time */
+ cur_time = rte_get_timer_cycles();
+ end_time = cur_time + (hz * TEST_DURATION_S);
+
+ /* start slave cores */
+ stop_slaves = 0;
+ printf("Start timer manage race condition test (%u seconds)\n",
+ TEST_DURATION_S);
+ rte_eal_mp_remote_launch(slave_main_loop, NULL, SKIP_MASTER);
+
+ while (diff >= 0) {
+ /* run the timers */
+ rte_timer_manage();
+
+ /* wait 100 ms */
+ usec_delay(100*1000);
+
+ cur_time = rte_get_timer_cycles();
+ diff = end_time - cur_time;
+ }
+
+ /* stop slave cores */
+ printf("Stopping timer manage race condition test\n");
+ stop_slaves = 1;
+ rte_eal_mp_wait_lcore();
+
+ /* stop timers */
+ for (i = 0; i < N_TIMERS; i++) {
+ ret = rte_timer_stop(&timer[i]);
+ TEST_ASSERT(ret == 0, "rte_timer_stop failed");
+ }
+
+ return TEST_SUCCESS;
+}
+
+static struct test_command timer_racecond_cmd = {
+ .command = "timer_racecond_autotest",
+ .callback = test_timer_racecond,
+};
+REGISTER_TEST_COMMAND(timer_racecond_cmd);
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [dpdk-dev] [PATCH v2 3/3] timer: fix race condition in rte_timer_manage()
2015-07-23 22:42 [dpdk-dev] [PATCH 0/3] timer: fix rte_timer_manage and improve unit tests rsanford2
` (6 preceding siblings ...)
2015-07-27 22:46 ` [dpdk-dev] [PATCH v2 2/3] timer: add timer-manage race condition test rsanford2
@ 2015-07-27 22:46 ` rsanford2
7 siblings, 0 replies; 12+ messages in thread
From: rsanford2 @ 2015-07-27 22:46 UTC (permalink / raw)
To: dev
From: Robert Sanford <rsanford@akamai.com>
Eliminate problematic race condition in rte_timer_manage() that can
lead to corruption of per-lcore pending-lists (implemented as
skip-lists). The race condition occurs when rte_timer_manage() expires
multiple timers on lcore A, while lcore B simultaneously invokes
rte_timer_reset() for one of the expiring timers (other than the first
one).
Lcore A splits its pending-list, creating a local list of expired timers
linked through their sl_next[0] pointers, and sets the first expired
timer to the RUNNING state, all during one list-lock round trip.
Lcore A then unlocks the list-lock to run the first callback, and that
is when A and B can have different interpretations of the subsequent
expired timers' true state. Lcore B sees an expired timer still in the
PENDING state, atomically changes the timer to the CONFIG state, locks
lcore A's list-lock, and reinserts the timer into A's pending-list.
The two lcores try to use the same next-pointers to maintain both lists!
Our solution is to remove expired timers from the pending-list and try
to set them all to the RUNNING state in one atomic step, i.e.,
rte_timer_manage() should perform these two actions within one
ownership of the list-lock.
After splitting the pending-list at the current point in time and trying
to set all expired timers to the RUNNING state, we must put back into
the pending-list any timers that we failed to set to the RUNNING state,
all while still holding the list-lock. It is then safe to release the
lock and run the callback functions for all expired timers that remain
on our local run-list.
Signed-off-by: Robert Sanford <rsanford@akamai.com>
---
lib/librte_timer/rte_timer.c | 56 ++++++++++++++++++++++++++---------------
1 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 8e9243a..3dcdab5 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -504,6 +504,7 @@ void rte_timer_manage(void)
{
union rte_timer_status status;
struct rte_timer *tim, *next_tim;
+ struct rte_timer *run_first_tim, **pprev;
unsigned lcore_id = rte_lcore_id();
struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
uint64_t cur_time;
@@ -519,9 +520,9 @@ void rte_timer_manage(void)
cur_time = rte_get_timer_cycles();
#ifdef RTE_ARCH_X86_64
- /* on 64-bit the value cached in the pending_head.expired will be updated
- * atomically, so we can consult that for a quick check here outside the
- * lock */
+ /* on 64-bit the value cached in the pending_head.expired will be
+ * updated atomically, so we can consult that for a quick check here
+ * outside the lock */
if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
return;
#endif
@@ -531,8 +532,10 @@ void rte_timer_manage(void)
/* if nothing to do just unlock and return */
if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
- priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time)
- goto done;
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ return;
+ }
/* save start of list of expired timers */
tim = priv_timer[lcore_id].pending_head.sl_next[0];
@@ -540,30 +543,47 @@ void rte_timer_manage(void)
/* break the existing list at current time point */
timer_get_prev_entries(cur_time, lcore_id, prev);
for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
- priv_timer[lcore_id].pending_head.sl_next[i] = prev[i]->sl_next[i];
+ priv_timer[lcore_id].pending_head.sl_next[i] =
+ prev[i]->sl_next[i];
if (prev[i]->sl_next[i] == NULL)
priv_timer[lcore_id].curr_skiplist_depth--;
prev[i] ->sl_next[i] = NULL;
}
- /* now scan expired list and call callbacks */
+ /* transition run-list from PENDING to RUNNING */
+ run_first_tim = tim;
+ pprev = &run_first_tim;
+
for ( ; tim != NULL; tim = next_tim) {
next_tim = tim->sl_next[0];
ret = timer_set_running_state(tim);
+ if (likely(ret == 0)) {
+ pprev = &tim->sl_next[0];
+ } else {
+ /* another core is trying to re-config this one,
+ * remove it from local expired list and put it
+ * back on the priv_timer[] skip list */
+ *pprev = next_tim;
+ timer_add(tim, lcore_id, 1);
+ }
+ }
- /* this timer was not pending, continue */
- if (ret < 0)
- continue;
+ /* update the next to expire timer value */
+ priv_timer[lcore_id].pending_head.expire =
+ (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire;
- rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ /* now scan expired list and call callbacks */
+ for (tim = run_first_tim; tim != NULL; tim = next_tim) {
+ next_tim = tim->sl_next[0];
priv_timer[lcore_id].updated = 0;
/* execute callback function with list unlocked */
tim->f(tim, tim->arg);
- rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
__TIMER_STAT_ADD(pending, -1);
/* the timer was stopped or reloaded by the callback
* function, we have nothing to do here */
@@ -579,23 +599,17 @@ void rte_timer_manage(void)
}
else {
/* keep it in list and mark timer as pending */
+ rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
status.state = RTE_TIMER_PENDING;
__TIMER_STAT_ADD(pending, 1);
status.owner = (int16_t)lcore_id;
rte_wmb();
tim->status.u32 = status.u32;
__rte_timer_reset(tim, cur_time + tim->period,
- tim->period, lcore_id, tim->f, tim->arg, 1);
+ tim->period, lcore_id, tim->f, tim->arg, 1);
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
}
}
-
- /* update the next to expire timer value */
- priv_timer[lcore_id].pending_head.expire =
- (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
- priv_timer[lcore_id].pending_head.sl_next[0]->expire;
-done:
- /* job finished, unlock the list lock */
- rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
}
/* dump statistics about timers */
--
1.7.1
^ permalink raw reply [flat|nested] 12+ messages in thread