From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7DA41A0C56; Wed, 1 Sep 2021 21:54:02 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3F94C40687; Wed, 1 Sep 2021 21:54:02 +0200 (CEST) Received: from mail.lysator.liu.se (mail.lysator.liu.se [130.236.254.3]) by mails.dpdk.org (Postfix) with ESMTP id E89EF40686 for ; Wed, 1 Sep 2021 21:54:00 +0200 (CEST) Received: from mail.lysator.liu.se (localhost [127.0.0.1]) by mail.lysator.liu.se (Postfix) with ESMTP id 98F4940013 for ; Wed, 1 Sep 2021 21:53:59 +0200 (CEST) Received: by mail.lysator.liu.se (Postfix, from userid 1004) id 854534000F; Wed, 1 Sep 2021 21:53:59 +0200 (CEST) X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on bernadotte.lysator.liu.se X-Spam-Level: X-Spam-Status: No, score=-1.5 required=5.0 tests=ALL_TRUSTED, AWL, NICE_REPLY_A autolearn=disabled version=3.4.2 X-Spam-Score: -1.5 Received: from [192.168.1.59] (h-62-63-215-114.A163.priv.bahnhof.se [62.63.215.114]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.lysator.liu.se (Postfix) with ESMTPSA id 8BD3840005; Wed, 1 Sep 2021 21:53:54 +0200 (CEST) To: Bruce Richardson , dev@dpdk.org Cc: conor.walsh@intel.com, kevin.laatz@intel.com, fengchengwen@huawei.com, jerinj@marvell.com References: <20210826183301.333442-1-bruce.richardson@intel.com> <20210901163216.120087-1-bruce.richardson@intel.com> <20210901163216.120087-6-bruce.richardson@intel.com> From: =?UTF-8?Q?Mattias_R=c3=b6nnblom?= Message-ID: Date: Wed, 1 Sep 2021 21:53:54 +0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0 MIME-Version: 1.0 In-Reply-To: <20210901163216.120087-6-bruce.richardson@intel.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-Virus-Scanned: ClamAV using ClamSMTP Subject: Re: [dpdk-dev] [PATCH v2 5/6] app/test: test dmadev instance failure handling X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On 2021-09-01 18:32, Bruce Richardson wrote: > Add a series of tests to inject bad copy operations into a dmadev to > test the error handling and reporting capabilities. Various combinations > of errors in various positions in a burst are tested, as are errors in > bursts with fence flag set, and multiple errors in a single burst. > > Signed-off-by: Bruce Richardson > --- > app/test/test_dmadev.c | 427 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 427 insertions(+) > > diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c > index 7a808a9cba..5d7b6ddd87 100644 > --- a/app/test/test_dmadev.c > +++ b/app/test/test_dmadev.c > @@ -302,6 +302,414 @@ test_enqueue_copies(int dev_id, uint16_t vchan) > || do_multi_copies(dev_id, vchan, 0, 0, 1); > } > > +/* Failure handling test cases - global macros and variables for those tests*/ > +#define COMP_BURST_SZ 16 > +#define OPT_FENCE(idx) ((fence && idx == 8) ? RTE_DMA_OP_FLAG_FENCE : 0) > + > +static int > +test_failure_in_full_burst(int dev_id, uint16_t vchan, bool fence, > + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) > +{ > + /* Test single full batch statuses with failures */ > + enum rte_dma_status_code status[COMP_BURST_SZ]; > + struct rte_dmadev_stats baseline, stats; > + uint16_t invalid_addr_id = 0; > + uint16_t idx; > + uint16_t count, status_count; > + unsigned int i; > + bool error = 0; error = false; > + int err_count = 0; > + > + rte_dmadev_stats_get(dev_id, vchan, &baseline); /* get a baseline set of stats */ > + for (i = 0; i < COMP_BURST_SZ; i++) { > + int id = rte_dmadev_copy(dev_id, vchan, > + (i == fail_idx ? 0 : (srcs[i]->buf_iova + srcs[i]->data_off)), > + dsts[i]->buf_iova + dsts[i]->data_off, > + COPY_LEN, OPT_FENCE(i)); > + if (id < 0) { > + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", i); > + return -1; > + } > + if (i == fail_idx) > + invalid_addr_id = id; > + } > + rte_dmadev_submit(dev_id, vchan); > + rte_dmadev_stats_get(dev_id, vchan, &stats); > + if (stats.submitted != baseline.submitted + COMP_BURST_SZ) { > + PRINT_ERR("Submitted stats value not as expected, %"PRIu64" not %"PRIu64"\n", > + stats.submitted, baseline.submitted + COMP_BURST_SZ); > + return -1; > + } > + > + await_hw(dev_id, vchan); > + > + count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); > + if (count != fail_idx) { > + PRINT_ERR("Error with rte_dmadev_completed for failure test. Got returned %u not %u.\n", > + count, fail_idx); > + rte_dmadev_dump(dev_id, stdout); > + return -1; > + } > + if (error == false) { if (!error) > + PRINT_ERR("Error, missing expected failed copy, %u. has_error is not set\n", > + fail_idx); > + return -1; > + } > + if (idx != invalid_addr_id - 1) { > + PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, not %u\n", > + fail_idx, idx, invalid_addr_id - 1); > + return -1; > + } > + > + /* all checks ok, now verify calling completed() again always returns 0 */ > + for (i = 0; i < 10; i++) { > + if (rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error) != 0 > + || error == false || idx != (invalid_addr_id - 1)) { > + PRINT_ERR("Error with follow-up completed calls for fail idx %u\n", > + fail_idx); > + return -1; > + } > + } > + > + status_count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ, > + &idx, status); > + /* some HW may stop on error and be restarted after getting error status for single value > + * To handle this case, if we get just one error back, wait for more completions and get > + * status for rest of the burst > + */ > + if (status_count == 1) { > + await_hw(dev_id, vchan); > + status_count += rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ - 1, > + &idx, &status[1]); > + } > + /* check that at this point we have all status values */ > + if (status_count != COMP_BURST_SZ - count) { > + PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u not %u\n", > + fail_idx, status_count, COMP_BURST_SZ - count); > + return -1; > + } > + /* now verify just one failure followed by multiple successful or skipped entries */ > + if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) { > + PRINT_ERR("Error with status returned for fail idx %u. First status was not failure\n", > + fail_idx); > + return -1; > + } > + for (i = 1; i < status_count; i++) { > + /* after a failure in a burst, depending on ordering/fencing, > + * operations may be successful or skipped because of previous error. > + */ > + if (status[i] != RTE_DMA_STATUS_SUCCESSFUL > + && status[i] != RTE_DMA_STATUS_NOT_ATTEMPTED) { > + PRINT_ERR("Error with status calls for fail idx %u. Status for job %u (of %u) is not successful\n", > + fail_idx, count + i, COMP_BURST_SZ); > + return -1; > + } > + } > + > + /* check the completed + errors stats are as expected */ > + rte_dmadev_stats_get(dev_id, vchan, &stats); > + if (stats.completed != baseline.completed + COMP_BURST_SZ) { > + PRINT_ERR("Completed stats value not as expected, %"PRIu64" not %"PRIu64"\n", > + stats.completed, baseline.completed + COMP_BURST_SZ); > + return -1; > + } > + for (i = 0; i < status_count; i++) > + err_count += (status[i] != RTE_DMA_STATUS_SUCCESSFUL); > + if (stats.errors != baseline.errors + err_count) { > + PRINT_ERR("'Errors' stats value not as expected, %"PRIu64" not %"PRIu64"\n", > + stats.errors, baseline.errors + err_count); > + return -1; > + } > + > + return 0; > +} > + > +static int > +test_individual_status_query_with_failure(int dev_id, uint16_t vchan, bool fence, > + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) > +{ > + /* Test gathering batch statuses one at a time */ > + enum rte_dma_status_code status[COMP_BURST_SZ]; > + uint16_t invalid_addr_id = 0; > + uint16_t idx; > + uint16_t count = 0, status_count = 0; > + unsigned int j; > + bool error = false; > + > + for (j = 0; j < COMP_BURST_SZ; j++) { > + int id = rte_dmadev_copy(dev_id, vchan, > + (j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)), > + dsts[j]->buf_iova + dsts[j]->data_off, > + COPY_LEN, OPT_FENCE(j)); > + if (id < 0) { > + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); > + return -1; > + } > + if (j == fail_idx) > + invalid_addr_id = id; > + } > + rte_dmadev_submit(dev_id, vchan); > + await_hw(dev_id, vchan); > + > + /* use regular "completed" until we hit error */ > + while (!error) { > + uint16_t n = rte_dmadev_completed(dev_id, vchan, 1, &idx, &error); > + count += n; > + if (n > 1 || count >= COMP_BURST_SZ) { > + PRINT_ERR("Error - too many completions got\n"); > + return -1; > + } > + if (n == 0 && !error) { > + PRINT_ERR("Error, unexpectedly got zero completions after %u completed\n", > + count); > + return -1; > + } > + } > + if (idx != invalid_addr_id - 1) { > + PRINT_ERR("Error, last successful index not as expected, got %u, expected %u\n", > + idx, invalid_addr_id - 1); > + return -1; > + } > + > + /* use completed_status until we hit end of burst */ > + while (count + status_count < COMP_BURST_SZ) { > + uint16_t n = rte_dmadev_completed_status(dev_id, vchan, 1, &idx, > + &status[status_count]); > + await_hw(dev_id, vchan); /* allow delay to ensure jobs are completed */ > + status_count += n; > + if (n != 1) { > + PRINT_ERR("Error: unexpected number of completions received, %u, not 1\n", > + n); > + return -1; > + } > + } > + > + /* check for single failure */ > + if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) { > + PRINT_ERR("Error, unexpected successful DMA transaction\n"); > + return -1; > + } > + for (j = 1; j < status_count; j++) { > + if (status[j] != RTE_DMA_STATUS_SUCCESSFUL > + && status[j] != RTE_DMA_STATUS_NOT_ATTEMPTED) { > + PRINT_ERR("Error, unexpected DMA error reported\n"); > + return -1; > + } > + } > + > + return 0; > +} > + > +static int > +test_single_item_status_query_with_failure(int dev_id, uint16_t vchan, > + struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int fail_idx) > +{ > + /* When error occurs just collect a single error using "completed_status()" > + * before going to back to completed() calls > + */ > + enum rte_dma_status_code status; > + uint16_t invalid_addr_id = 0; > + uint16_t idx; > + uint16_t count, status_count, count2; > + unsigned int j; > + bool error = 0; Same here. > + > + for (j = 0; j < COMP_BURST_SZ; j++) { > + int id = rte_dmadev_copy(dev_id, vchan, > + (j == fail_idx ? 0 : (srcs[j]->buf_iova + srcs[j]->data_off)), > + dsts[j]->buf_iova + dsts[j]->data_off, > + COPY_LEN, 0); > + if (id < 0) { > + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); > + return -1; > + } > + if (j == fail_idx) > + invalid_addr_id = id; > + } > + rte_dmadev_submit(dev_id, vchan); > + await_hw(dev_id, vchan); > + > + /* get up to the error point */ > + count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); > + if (count != fail_idx) { > + PRINT_ERR("Error with rte_dmadev_completed for failure test. Got returned %u not %u.\n", > + count, fail_idx); > + rte_dmadev_dump(dev_id, stdout); > + return -1; > + } > + if (error == false) { And here. > + PRINT_ERR("Error, missing expected failed copy, %u. has_error is not set\n", > + fail_idx); > + return -1; > + } > + if (idx != invalid_addr_id - 1) { > + PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, not %u\n", > + fail_idx, idx, invalid_addr_id - 1); > + return -1; > + } > + > + /* get the error code */ > + status_count = rte_dmadev_completed_status(dev_id, vchan, 1, &idx, &status); > + if (status_count != 1) { > + PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u not %u\n", > + fail_idx, status_count, COMP_BURST_SZ - count); > + return -1; > + } > + if (status == RTE_DMA_STATUS_SUCCESSFUL) { > + PRINT_ERR("Error with status returned for fail idx %u. First status was not failure\n", > + fail_idx); > + return -1; > + } > + /* delay in case time needed after err handled to complete other jobs */ > + await_hw(dev_id, vchan); > + > + /* get the rest of the completions without status */ > + count2 = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, &error); > + if (error == true) { if (error) > + PRINT_ERR("Error, got further errors post completed_status() call, for failure case %u.\n", > + fail_idx); > + return -1; > + } > + if (count + status_count + count2 != COMP_BURST_SZ) { > + PRINT_ERR("Error, incorrect number of completions received, got %u not %u\n", > + count + status_count + count2, COMP_BURST_SZ); > + return -1; > + } > + > + return 0; > +} > + > +static int > +test_multi_failure(int dev_id, uint16_t vchan, struct rte_mbuf **srcs, struct rte_mbuf **dsts, > + const unsigned int *fail, size_t num_fail) > +{ > + /* test having multiple errors in one go */ > + enum rte_dma_status_code status[COMP_BURST_SZ]; > + unsigned int i, j; > + uint16_t count, err_count = 0; > + bool error = 0; false > + > + /* enqueue and gather completions in one go */ > + for (j = 0; j < COMP_BURST_SZ; j++) { > + uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off; > + /* set up for failure if the current index is anywhere is the fails array */ > + for (i = 0; i < num_fail; i++) > + if (j == fail[i]) > + src = 0; > + > + int id = rte_dmadev_copy(dev_id, vchan, > + src, dsts[j]->buf_iova + dsts[j]->data_off, > + COPY_LEN, 0); > + if (id < 0) { > + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); > + return -1; > + } > + } > + rte_dmadev_submit(dev_id, vchan); > + await_hw(dev_id, vchan); > + > + count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ, NULL, status); > + while (count < COMP_BURST_SZ) { > + await_hw(dev_id, vchan); > + > + uint16_t ret = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ - count, > + NULL, &status[count]); > + if (ret == 0) { > + PRINT_ERR("Error getting all completions for jobs. Got %u of %u\n", > + count, COMP_BURST_SZ); > + return -1; > + } > + count += ret; > + } > + for (i = 0; i < count; i++) { > + if (status[i] != RTE_DMA_STATUS_SUCCESSFUL) > + err_count++; > + } Remove {} around the loop? > + if (err_count != num_fail) { > + PRINT_ERR("Error: Invalid number of failed completions returned, %u; expected %zu\n", > + err_count, num_fail); > + return -1; > + } > + > + /* enqueue and gather completions in bursts, but getting errors one at a time */ > + for (j = 0; j < COMP_BURST_SZ; j++) { > + uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off; > + /* set up for failure if the current index is anywhere is the fails array */ > + for (i = 0; i < num_fail; i++) > + if (j == fail[i]) > + src = 0; > + > + int id = rte_dmadev_copy(dev_id, vchan, > + src, dsts[j]->buf_iova + dsts[j]->data_off, > + COPY_LEN, 0); > + if (id < 0) { > + PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", j); > + return -1; > + } > + } > + rte_dmadev_submit(dev_id, vchan); > + await_hw(dev_id, vchan); > + > + count = 0; > + err_count = 0; > + while (count + err_count < COMP_BURST_SZ) { > + count += rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, NULL, &error); > + if (error) { > + uint16_t ret = rte_dmadev_completed_status(dev_id, vchan, 1, > + NULL, status); > + if (ret != 1) { > + PRINT_ERR("Error getting error-status for completions\n"); > + return -1; > + } > + err_count += ret; > + await_hw(dev_id, vchan); > + } > + } > + if (err_count != num_fail) { > + PRINT_ERR("Error: Incorrect number of failed completions received, got %u not %zu\n", > + err_count, num_fail); > + return -1; > + } > + > + return 0; > +} > + > +static int > +test_completion_status(int dev_id, uint16_t vchan, bool fence) > +{ > + const unsigned int fail[] = {0, 7, 14, 15}; > + struct rte_mbuf *srcs[COMP_BURST_SZ], *dsts[COMP_BURST_SZ]; > + unsigned int i; > + > + for (i = 0; i < COMP_BURST_SZ; i++) { > + srcs[i] = rte_pktmbuf_alloc(pool); > + dsts[i] = rte_pktmbuf_alloc(pool); > + } > + > + for (i = 0; i < RTE_DIM(fail); i++) { > + if (test_failure_in_full_burst(dev_id, vchan, fence, srcs, dsts, fail[i]) < 0) > + return -1; > + > + if (test_individual_status_query_with_failure(dev_id, vchan, fence, > + srcs, dsts, fail[i]) < 0) > + return -1; > + > + /* test is run the same fenced, or unfenced, but no harm in running it twice */ > + if (test_single_item_status_query_with_failure(dev_id, vchan, > + srcs, dsts, fail[i]) < 0) > + return -1; > + } > + > + if (test_multi_failure(dev_id, vchan, srcs, dsts, fail, RTE_DIM(fail)) < 0) > + return -1; > + > + for (i = 0; i < COMP_BURST_SZ; i++) { > + rte_pktmbuf_free(srcs[i]); > + rte_pktmbuf_free(dsts[i]); > + } > + return 0; > +} > + > static int > test_dmadev_instance(uint16_t dev_id) > { > @@ -386,6 +794,25 @@ test_dmadev_instance(uint16_t dev_id) > if (check_stats(&stats, true) < 0) > goto err; > > + /* to test error handling we can provide null pointers for source or dest in copies. This > + * requires VA mode in DPDK, since NULL(0) is a valid physical address. > + */ > + if (rte_eal_iova_mode() == RTE_IOVA_VA) { > + rte_dmadev_stats_reset(dev_id, vchan); > + printf("DMA Dev: %u, Running Completion Handling Tests (errors expected)\n", > + dev_id); > + if (test_completion_status(dev_id, vchan, false) != 0) /* without fences */ > + goto err; > + if (test_completion_status(dev_id, vchan, true) != 0) /* with fences */ > + goto err; > + rte_dmadev_stats_get(dev_id, 0, &stats); > + printf("Ops submitted: %"PRIu64"\t", stats.submitted); > + printf("Ops completed: %"PRIu64"\t", stats.completed); > + printf("Errors: %"PRIu64"\n", stats.errors); > + if (check_stats(&stats, false) < 0) /* don't check stats.errors this time */ > + goto err; > + } > + > rte_mempool_free(pool); > rte_dmadev_stop(dev_id); > rte_dmadev_stats_reset(dev_id, vchan); >