From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ig0-f169.google.com (mail-ig0-f169.google.com [209.85.213.169]) by dpdk.org (Postfix) with ESMTP id 9EA57C5A8 for ; Tue, 28 Jul 2015 00:46:24 +0200 (CEST) Received: by igbij6 with SMTP id ij6so92476206igb.1 for ; Mon, 27 Jul 2015 15:46:24 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=mtddWISqhfEtYyxj8ghBrF7MoEdgVm39TrpuKN1ejkM=; b=hgZ6kzEnngdN2ykcybV6TzqQFsHTytZFLMUiuUMAvXt3EfuVarOizopmUaJTfMslEj GSELuZuBRn/y8PJDFywJxX9CX7VQ2F/JDfW391CQIcFlcSaAhcjVTBl6eL5xOYdJXomn tT1H+R4FYdYb1GfN8pJF8Ajlr3k26Ib9W8Te5MfRBoxebUPN6Zq4M+6QWynyU6+EE47u cT0aEJGIk5IodgI4WA2L1EI01bw29af6G0b8qgdfS0oOeK8jeMGiuqSra2+m/WxDsEds z5UQinRZrw8xOrOIM5IUWblXRSbHTZD7lzzICjxDruYMl14kmh51VlwDvrBQJ6giNxdw 1NmA== X-Received: by 10.107.137.87 with SMTP id l84mr48327828iod.119.1438037184216; Mon, 27 Jul 2015 15:46:24 -0700 (PDT) Received: from localhost.localdomain ([23.79.237.14]) by smtp.gmail.com with ESMTPSA id j18sm7063061igf.2.2015.07.27.15.46.23 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 27 Jul 2015 15:46:23 -0700 (PDT) From: rsanford2@gmail.com To: dev@dpdk.org Date: Mon, 27 Jul 2015 18:46:04 -0400 Message-Id: <1438037168-639-2-git-send-email-rsanford2@gmail.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1437691347-58708-1-git-send-email-rsanford2@gmail.com> References: <1437691347-58708-1-git-send-email-rsanford2@gmail.com> Subject: [dpdk-dev] [PATCH v2 1/3] timer: fix stress test 2 synchronization bug X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 27 Jul 2015 22:46:25 -0000 From: Robert Sanford Fix app/test timer stress test 2: Sometimes this test fails and seg-faults because the slave lcores get out of phase with the master. The master uses a single int, 'ready', to synchronize multiple slave lcores through multiple phases of the test. To resolve, we construct simple synchronization primitives that use one atomic-int state variable per slave. The master tells the slaves when to start, and then waits for all of them to finish. Each slave waits for the master to tell it to start, and then tells the master when it has finished. Signed-off-by: Robert Sanford --- app/test/test_timer.c | 154 ++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 121 insertions(+), 33 deletions(-) diff --git a/app/test/test_timer.c b/app/test/test_timer.c index 73da5b6..944e2ad 100644 --- a/app/test/test_timer.c +++ b/app/test/test_timer.c @@ -137,13 +137,13 @@ #include #include - #define TEST_DURATION_S 20 /* in seconds */ #define NB_TIMER 4 #define RTE_LOGTYPE_TESTTIMER RTE_LOGTYPE_USER3 static volatile uint64_t end_time; +static volatile int test_failed; struct mytimerinfo { struct rte_timer tim; @@ -230,6 +230,64 @@ timer_stress_main_loop(__attribute__((unused)) void *arg) return 0; } +/* Need to synchronize slave lcores through multiple steps. */ +enum { SLAVE_WAITING = 1, SLAVE_RUN_SIGNAL, SLAVE_RUNNING, SLAVE_FINISHED }; +static rte_atomic16_t slave_state[RTE_MAX_LCORE]; + +static void +master_init_slaves(void) +{ + unsigned i; + + RTE_LCORE_FOREACH_SLAVE(i) { + rte_atomic16_set(&slave_state[i], SLAVE_WAITING); + } +} + +static void +master_start_slaves(void) +{ + unsigned i; + + RTE_LCORE_FOREACH_SLAVE(i) { + rte_atomic16_set(&slave_state[i], SLAVE_RUN_SIGNAL); + } + RTE_LCORE_FOREACH_SLAVE(i) { + while (rte_atomic16_read(&slave_state[i]) != SLAVE_RUNNING) + rte_pause(); + } +} + +static void +master_wait_for_slaves(void) +{ + unsigned i; + + RTE_LCORE_FOREACH_SLAVE(i) { + while (rte_atomic16_read(&slave_state[i]) != SLAVE_FINISHED) + rte_pause(); + } +} + +static void +slave_wait_to_start(void) +{ + unsigned lcore_id = rte_lcore_id(); + + while (rte_atomic16_read(&slave_state[lcore_id]) != SLAVE_RUN_SIGNAL) + rte_pause(); + rte_atomic16_set(&slave_state[lcore_id], SLAVE_RUNNING); +} + +static void +slave_finish(void) +{ + unsigned lcore_id = rte_lcore_id(); + + rte_atomic16_set(&slave_state[lcore_id], SLAVE_FINISHED); +} + + static volatile int cb_count = 0; /* callback for second stress test. will only be called @@ -247,31 +305,37 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg) { static struct rte_timer *timers; int i, ret; - static volatile int ready = 0; uint64_t delay = rte_get_timer_hz() / 4; unsigned lcore_id = rte_lcore_id(); + unsigned master = rte_get_master_lcore(); int32_t my_collisions = 0; - static rte_atomic32_t collisions = RTE_ATOMIC32_INIT(0); + static rte_atomic32_t collisions; - if (lcore_id == rte_get_master_lcore()) { + if (lcore_id == master) { cb_count = 0; + test_failed = 0; + rte_atomic32_set(&collisions, 0); + master_init_slaves(); timers = rte_malloc(NULL, sizeof(*timers) * NB_STRESS2_TIMERS, 0); if (timers == NULL) { printf("Test Failed\n"); printf("- Cannot allocate memory for timers\n" ); - return -1; + test_failed = 1; + master_start_slaves(); + goto cleanup; } for (i = 0; i < NB_STRESS2_TIMERS; i++) rte_timer_init(&timers[i]); - ready = 1; + master_start_slaves(); } else { - while (!ready) - rte_pause(); + slave_wait_to_start(); + if (test_failed) + goto cleanup; } /* have all cores schedule all timers on master lcore */ for (i = 0; i < NB_STRESS2_TIMERS; i++) { - ret = rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(), + ret = rte_timer_reset(&timers[i], delay, SINGLE, master, timer_stress2_cb, NULL); /* there will be collisions when multiple cores simultaneously * configure the same timers */ @@ -281,11 +345,18 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg) if (my_collisions != 0) rte_atomic32_add(&collisions, my_collisions); - ready = 0; + /* wait long enough for timers to expire */ rte_delay_ms(500); + /* all cores rendezvous */ + if (lcore_id == master) { + master_wait_for_slaves(); + } else { + slave_finish(); + } + /* now check that we get the right number of callbacks */ - if (lcore_id == rte_get_master_lcore()) { + if (lcore_id == master) { my_collisions = rte_atomic32_read(&collisions); if (my_collisions != 0) printf("- %d timer reset collisions (OK)\n", my_collisions); @@ -295,49 +366,63 @@ timer_stress2_main_loop(__attribute__((unused)) void *arg) printf("- Stress test 2, part 1 failed\n"); printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS, cb_count); - return -1; + test_failed = 1; + master_start_slaves(); + goto cleanup; } - ready = 1; + cb_count = 0; + + /* proceed */ + master_start_slaves(); } else { - while (!ready) - rte_pause(); + /* proceed */ + slave_wait_to_start(); + if (test_failed) + goto cleanup; } /* now test again, just stop and restart timers at random after init*/ for (i = 0; i < NB_STRESS2_TIMERS; i++) - rte_timer_reset(&timers[i], delay, SINGLE, rte_get_master_lcore(), + rte_timer_reset(&timers[i], delay, SINGLE, master, timer_stress2_cb, NULL); - cb_count = 0; /* pick random timer to reset, stopping them first half the time */ for (i = 0; i < 100000; i++) { int r = rand() % NB_STRESS2_TIMERS; if (i % 2) rte_timer_stop(&timers[r]); - rte_timer_reset(&timers[r], delay, SINGLE, rte_get_master_lcore(), + rte_timer_reset(&timers[r], delay, SINGLE, master, timer_stress2_cb, NULL); } + /* wait long enough for timers to expire */ rte_delay_ms(500); /* now check that we get the right number of callbacks */ - if (lcore_id == rte_get_master_lcore()) { - rte_timer_manage(); - - /* clean up statics, in case we run again */ - rte_free(timers); - timers = NULL; - ready = 0; - rte_atomic32_set(&collisions, 0); + if (lcore_id == master) { + master_wait_for_slaves(); + rte_timer_manage(); if (cb_count != NB_STRESS2_TIMERS) { printf("Test Failed\n"); printf("- Stress test 2, part 2 failed\n"); printf("- Expected %d callbacks, got %d\n", NB_STRESS2_TIMERS, cb_count); - return -1; + test_failed = 1; + } else { + printf("Test OK\n"); } - printf("Test OK\n"); + } + +cleanup: + if (lcore_id == master) { + master_wait_for_slaves(); + if (timers != NULL) { + rte_free(timers); + timers = NULL; + } + } else { + slave_finish(); } return 0; @@ -485,12 +570,12 @@ test_timer(void) /* sanity check our timer sources and timer config values */ if (timer_sanity_check() < 0) { printf("Timer sanity checks failed\n"); - return -1; + return TEST_FAILED; } if (rte_lcore_count() < 2) { printf("not enough lcores for this test\n"); - return -1; + return TEST_FAILED; } /* init timer */ @@ -514,9 +599,12 @@ test_timer(void) rte_timer_stop_sync(&mytiminfo[0].tim); /* run a second, slightly different set of stress tests */ - printf("Start timer stress tests 2\n"); + printf("\nStart timer stress tests 2\n"); + test_failed = 0; rte_eal_mp_remote_launch(timer_stress2_main_loop, NULL, CALL_MASTER); rte_eal_mp_wait_lcore(); + if (test_failed) + return TEST_FAILED; /* calculate the "end of test" time */ cur_time = rte_get_timer_cycles(); @@ -524,7 +612,7 @@ test_timer(void) end_time = cur_time + (hz * TEST_DURATION_S); /* start other cores */ - printf("Start timer basic tests (%d seconds)\n", TEST_DURATION_S); + printf("\nStart timer basic tests (%d seconds)\n", TEST_DURATION_S); rte_eal_mp_remote_launch(timer_basic_main_loop, NULL, CALL_MASTER); rte_eal_mp_wait_lcore(); @@ -535,7 +623,7 @@ test_timer(void) rte_timer_dump_stats(stdout); - return 0; + return TEST_SUCCESS; } static struct test_command timer_cmd = { -- 1.7.1