From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 148F4A0C47; Wed, 1 Sep 2021 18:32:55 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 513E241178; Wed, 1 Sep 2021 18:32:45 +0200 (CEST) Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by mails.dpdk.org (Postfix) with ESMTP id 3D825410E9 for ; Wed, 1 Sep 2021 18:32:43 +0200 (CEST) X-IronPort-AV: E=McAfee;i="6200,9189,10094"; a="198359913" X-IronPort-AV: E=Sophos;i="5.84,369,1620716400"; d="scan'208";a="198359913" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 01 Sep 2021 09:32:42 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.84,369,1620716400"; d="scan'208";a="645812885" Received: from silpixa00399126.ir.intel.com ([10.237.223.29]) by orsmga005.jf.intel.com with ESMTP; 01 Sep 2021 09:32:41 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: conor.walsh@intel.com, kevin.laatz@intel.com, fengchengwen@huawei.com, jerinj@marvell.com, Bruce Richardson Date: Wed, 1 Sep 2021 17:32:15 +0100 Message-Id: <20210901163216.120087-6-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210901163216.120087-1-bruce.richardson@intel.com> References: <20210826183301.333442-1-bruce.richardson@intel.com> <20210901163216.120087-1-bruce.richardson@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v2 5/6] app/test: test dmadev instance failure handling X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add a series of tests to inject bad copy operations into a dmadev to test the error handling and reporting capabilities. Various combinations of errors in various positions in a burst are tested, as are errors in bursts with fence flag set, and multiple errors in a single burst. Signed-off-by: Bruce Richardson --- app/test/test_dmadev.c | 427 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c index 7a808a9cba..5d7b6ddd87 100644 --- a/app/test/test_dmadev.c +++ b/app/test/test_dmadev.c @@ -302,6 +302,414 @@ test_enqueue_copies(int dev_id, uint16_t vchan) || do_multi_copies(dev_id, vchan, 0, 0, 1); } +/* Failure handling test cases - global macros and variables for those tests*/ +#define COMP_BURST_SZ 16 +#define OPT_FENCE(idx) ((fence && idx == 8) ? RTE_DMA_OP_FLAG_FENCE : 0) + +static int +test_failure_in_full_burst(int dev_id, uint16_t vchan, bool fence, + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) +{ + /* Test single full batch statuses with failures */ + enum rte_dma_status_code status[COMP_BURST_SZ]; + struct rte_dmadev_stats baseline, stats; + uint16_t invalid_addr_id = 0; + uint16_t idx; + uint16_t count, status_count; + unsigned int i; + bool error = 0; + int err_count = 0; + + rte_dmadev_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */ + for (i = 0; i < COMP_BURST_SZ; i++) { + int id = rte_dmadev_copy(dev_id, vchan, + (i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)), + dsts[i]->buf_iova + dsts[i]->data_off, + COPY_LEN, OPT_FENCE(i)); + if (id < 0) { + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", i); + return -1; + } + if (i == fail_idx) + invalid_addr_id = id; + } + rte_dmadev_submit(dev_id, vchan); + rte_dmadev_stats_get(dev_id, vchan, &stats); + if (stats.submitted != baseline.submitted + COMP_BURST_SZ) { + PRINT_ERR("Submitted stats value not as expected, %"PRIu64" not %"PRIu64"\n", + stats.submitted, baseline.submitted + COMP_BURST_SZ); + return -1; + } + + await_hw(dev_id, vchan); + + count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); + if (count != fail_idx) { + PRINT_ERR("Error with rte_dmadev_completed for failure test. Got returned %u not %u.\n", + count, fail_idx); + rte_dmadev_dump(dev_id, stdout); + return -1; + } + if (error == false) { + PRINT_ERR("Error, missing expected failed copy, %u. has_error is not set\n", + fail_idx); + return -1; + } + if (idx != invalid_addr_id - 1) { + PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, not %u\n", + fail_idx, idx, invalid_addr_id - 1); + return -1; + } + + /* all checks ok, now verify calling completed() again always returns 0 */ + for (i = 0; i < 10; i++) { + if (rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error) != 0 + || error == false || idx != (invalid_addr_id - 1)) { + PRINT_ERR("Error with follow-up completed calls for fail idx %u\n", + fail_idx); + return -1; + } + } + + status_count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ, + &idx, status); + /* some HW may stop on error and be restarted after getting error status for single value + * To handle this case, if we get just one error back, wait for more completions and get + * status for rest of the burst + */ + if (status_count == 1) { + await_hw(dev_id, vchan); + status_count += rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ - 1, + &idx, &status[1]); + } + /* check that at this point we have all status values */ + if (status_count != COMP_BURST_SZ - count) { + PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u not %u\n", + fail_idx, status_count, COMP_BURST_SZ - count); + return -1; + } + /* now verify just one failure followed by multiple successful or skipped entries */ + if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) { + PRINT_ERR("Error with status returned for fail idx %u. First status was not failure\n", + fail_idx); + return -1; + } + for (i = 1; i < status_count; i++) { + /* after a failure in a burst, depending on ordering/fencing, + * operations may be successful or skipped because of previous error. + */ + if (status[i] != RTE_DMA_STATUS_SUCCESSFUL + && status[i] != RTE_DMA_STATUS_NOT_ATTEMPTED) { + PRINT_ERR("Error with status calls for fail idx %u. Status for job %u (of %u) is not successful\n", + fail_idx, count + i, COMP_BURST_SZ); + return -1; + } + } + + /* check the completed + errors stats are as expected */ + rte_dmadev_stats_get(dev_id, vchan, &stats); + if (stats.completed != baseline.completed + COMP_BURST_SZ) { + PRINT_ERR("Completed stats value not as expected, %"PRIu64" not %"PRIu64"\n", + stats.completed, baseline.completed + COMP_BURST_SZ); + return -1; + } + for (i = 0; i < status_count; i++) + err_count += (status[i] != RTE_DMA_STATUS_SUCCESSFUL); + if (stats.errors != baseline.errors + err_count) { + PRINT_ERR("'Errors' stats value not as expected, %"PRIu64" not %"PRIu64"\n", + stats.errors, baseline.errors + err_count); + return -1; + } + + return 0; +} + +static int +test_individual_status_query_with_failure(int dev_id, uint16_t vchan, bool fence, + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) +{ + /* Test gathering batch statuses one at a time */ + enum rte_dma_status_code status[COMP_BURST_SZ]; + uint16_t invalid_addr_id = 0; + uint16_t idx; + uint16_t count = 0, status_count = 0; + unsigned int j; + bool error = false; + + for (j = 0; j < COMP_BURST_SZ; j++) { + int id = rte_dmadev_copy(dev_id, vchan, + (j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + COPY_LEN, OPT_FENCE(j)); + if (id < 0) { + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); + return -1; + } + if (j == fail_idx) + invalid_addr_id = id; + } + rte_dmadev_submit(dev_id, vchan); + await_hw(dev_id, vchan); + + /* use regular "completed" until we hit error */ + while (!error) { + uint16_t n = rte_dmadev_completed(dev_id, vchan, 1, &idx, &error); + count += n; + if (n > 1 || count >= COMP_BURST_SZ) { + PRINT_ERR("Error - too many completions got\n"); + return -1; + } + if (n == 0 && !error) { + PRINT_ERR("Error, unexpectedly got zero completions after %u completed\n", + count); + return -1; + } + } + if (idx != invalid_addr_id - 1) { + PRINT_ERR("Error, last successful index not as expected, got %u, expected %u\n", + idx, invalid_addr_id - 1); + return -1; + } + + /* use completed_status until we hit end of burst */ + while (count + status_count < COMP_BURST_SZ) { + uint16_t n = rte_dmadev_completed_status(dev_id, vchan, 1, &idx, + &status[status_count]); + await_hw(dev_id, vchan); /* allow delay to ensure jobs are completed */ + status_count += n; + if (n != 1) { + PRINT_ERR("Error: unexpected number of completions received, %u, not 1\n", + n); + return -1; + } + } + + /* check for single failure */ + if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) { + PRINT_ERR("Error, unexpected successful DMA transaction\n"); + return -1; + } + for (j = 1; j < status_count; j++) { + if (status[j] != RTE_DMA_STATUS_SUCCESSFUL + && status[j] != RTE_DMA_STATUS_NOT_ATTEMPTED) { + PRINT_ERR("Error, unexpected DMA error reported\n"); + return -1; + } + } + + return 0; +} + +static int +test_single_item_status_query_with_failure(int dev_id, uint16_t vchan, + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) +{ + /* When error occurs just collect a single error using "completed_status()" + * before going to back to completed() calls + */ + enum rte_dma_status_code status; + uint16_t invalid_addr_id = 0; + uint16_t idx; + uint16_t count, status_count, count2; + unsigned int j; + bool error = 0; + + for (j = 0; j < COMP_BURST_SZ; j++) { + int id = rte_dmadev_copy(dev_id, vchan, + (j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + COPY_LEN, 0); + if (id < 0) { + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); + return -1; + } + if (j == fail_idx) + invalid_addr_id = id; + } + rte_dmadev_submit(dev_id, vchan); + await_hw(dev_id, vchan); + + /* get up to the error point */ + count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); + if (count != fail_idx) { + PRINT_ERR("Error with rte_dmadev_completed for failure test. Got returned %u not %u.\n", + count, fail_idx); + rte_dmadev_dump(dev_id, stdout); + return -1; + } + if (error == false) { + PRINT_ERR("Error, missing expected failed copy, %u. has_error is not set\n", + fail_idx); + return -1; + } + if (idx != invalid_addr_id - 1) { + PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, not %u\n", + fail_idx, idx, invalid_addr_id - 1); + return -1; + } + + /* get the error code */ + status_count = rte_dmadev_completed_status(dev_id, vchan, 1, &idx, &status); + if (status_count != 1) { + PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u not %u\n", + fail_idx, status_count, COMP_BURST_SZ - count); + return -1; + } + if (status == RTE_DMA_STATUS_SUCCESSFUL) { + PRINT_ERR("Error with status returned for fail idx %u. First status was not failure\n", + fail_idx); + return -1; + } + /* delay in case time needed after err handled to complete other jobs */ + await_hw(dev_id, vchan); + + /* get the rest of the completions without status */ + count2 = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); + if (error == true) { + PRINT_ERR("Error, got further errors post completed_status() call, for failure case %u.\n", + fail_idx); + return -1; + } + if (count + status_count + count2 != COMP_BURST_SZ) { + PRINT_ERR("Error, incorrect number of completions received, got %u not %u\n", + count + status_count + count2, COMP_BURST_SZ); + return -1; + } + + return 0; +} + +static int +test_multi_failure(int dev_id, uint16_t vchan, struct rte_mbuf **srcs, struct rte_mbuf **dsts, + const unsigned int *fail, size_t num_fail) +{ + /* test having multiple errors in one go */ + enum rte_dma_status_code status[COMP_BURST_SZ]; + unsigned int i, j; + uint16_t count, err_count = 0; + bool error = 0; + + /* enqueue and gather completions in one go */ + for (j = 0; j < COMP_BURST_SZ; j++) { + uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off; + /* set up for failure if the current index is anywhere is the fails array */ + for (i = 0; i < num_fail; i++) + if (j == fail[i]) + src = 0; + + int id = rte_dmadev_copy(dev_id, vchan, + src, dsts[j]->buf_iova + dsts[j]->data_off, + COPY_LEN, 0); + if (id < 0) { + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); + return -1; + } + } + rte_dmadev_submit(dev_id, vchan); + await_hw(dev_id, vchan); + + count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ, NULL, status); + while (count < COMP_BURST_SZ) { + await_hw(dev_id, vchan); + + uint16_t ret = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ - count, + NULL, &status[count]); + if (ret == 0) { + PRINT_ERR("Error getting all completions for jobs. Got %u of %u\n", + count, COMP_BURST_SZ); + return -1; + } + count += ret; + } + for (i = 0; i < count; i++) { + if (status[i] != RTE_DMA_STATUS_SUCCESSFUL) + err_count++; + } + if (err_count != num_fail) { + PRINT_ERR("Error: Invalid number of failed completions returned, %u; expected %zu\n", + err_count, num_fail); + return -1; + } + + /* enqueue and gather completions in bursts, but getting errors one at a time */ + for (j = 0; j < COMP_BURST_SZ; j++) { + uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off; + /* set up for failure if the current index is anywhere is the fails array */ + for (i = 0; i < num_fail; i++) + if (j == fail[i]) + src = 0; + + int id = rte_dmadev_copy(dev_id, vchan, + src, dsts[j]->buf_iova + dsts[j]->data_off, + COPY_LEN, 0); + if (id < 0) { + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); + return -1; + } + } + rte_dmadev_submit(dev_id, vchan); + await_hw(dev_id, vchan); + + count = 0; + err_count = 0; + while (count + err_count < COMP_BURST_SZ) { + count += rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, NULL, &error); + if (error) { + uint16_t ret = rte_dmadev_completed_status(dev_id, vchan, 1, + NULL, status); + if (ret != 1) { + PRINT_ERR("Error getting error-status for completions\n"); + return -1; + } + err_count += ret; + await_hw(dev_id, vchan); + } + } + if (err_count != num_fail) { + PRINT_ERR("Error: Incorrect number of failed completions received, got %u not %zu\n", + err_count, num_fail); + return -1; + } + + return 0; +} + +static int +test_completion_status(int dev_id, uint16_t vchan, bool fence) +{ + const unsigned int fail[] = {0, 7, 14, 15}; + struct rte_mbuf *srcs[COMP_BURST_SZ], *dsts[COMP_BURST_SZ]; + unsigned int i; + + for (i = 0; i < COMP_BURST_SZ; i++) { + srcs[i] = rte_pktmbuf_alloc(pool); + dsts[i] = rte_pktmbuf_alloc(pool); + } + + for (i = 0; i < RTE_DIM(fail); i++) { + if (test_failure_in_full_burst(dev_id, vchan, fence, srcs, dsts, fail[i]) < 0) + return -1; + + if (test_individual_status_query_with_failure(dev_id, vchan, fence, + srcs, dsts, fail[i]) < 0) + return -1; + + /* test is run the same fenced, or unfenced, but no harm in running it twice */ + if (test_single_item_status_query_with_failure(dev_id, vchan, + srcs, dsts, fail[i]) < 0) + return -1; + } + + if (test_multi_failure(dev_id, vchan, srcs, dsts, fail, RTE_DIM(fail)) < 0) + return -1; + + for (i = 0; i < COMP_BURST_SZ; i++) { + rte_pktmbuf_free(srcs[i]); + rte_pktmbuf_free(dsts[i]); + } + return 0; +} + static int test_dmadev_instance(uint16_t dev_id) { @@ -386,6 +794,25 @@ test_dmadev_instance(uint16_t dev_id) if (check_stats(&stats, true) < 0) goto err; + /* to test error handling we can provide null pointers for source or dest in copies. This + * requires VA mode in DPDK, since NULL(0) is a valid physical address. + */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) { + rte_dmadev_stats_reset(dev_id, vchan); + printf("DMA Dev: %u, Running Completion Handling Tests (errors expected)\n", + dev_id); + if (test_completion_status(dev_id, vchan, false) != 0) /* without fences */ + goto err; + if (test_completion_status(dev_id, vchan, true) != 0) /* with fences */ + goto err; + rte_dmadev_stats_get(dev_id, 0, &stats); + printf("Ops submitted: %"PRIu64"\t", stats.submitted); + printf("Ops completed: %"PRIu64"\t", stats.completed); + printf("Errors: %"PRIu64"\n", stats.errors); + if (check_stats(&stats, false) < 0) /* don't check stats.errors this time */ + goto err; + } + rte_mempool_free(pool); rte_dmadev_stop(dev_id); rte_dmadev_stats_reset(dev_id, vchan); -- 2.30.2