From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7AE68A0C41; Mon, 2 Aug 2021 12:21:14 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 60CD64113D; Mon, 2 Aug 2021 12:21:14 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id 7BB3A41134 for ; Mon, 2 Aug 2021 12:21:13 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 091A511D4; Mon, 2 Aug 2021 03:21:13 -0700 (PDT) Received: from net-arm-n1sdp.shanghai.arm.com (net-arm-n1sdp.shanghai.arm.com [10.169.208.222]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 50E653F70D; Mon, 2 Aug 2021 03:21:06 -0700 (PDT) From: Joyce Kong To: thomas@monjalon.net, david.marchand@redhat.com, honnappa.nagarahalli@arm.com, ruifeng.wang@arm.com, konstantin.ananyev@intel.com, rsanford@akamai.com, erik.g.carrillo@intel.com, olivier.matz@6wind.com, yipeng1.wang@intel.com, sameh.gobriel@intel.com, bruce.richardson@intel.com, vladimir.medvedkin@intel.com, anatoly.burakov@intel.com, andrew.rybchenko@oktetlabs.ru, jerinj@marvell.com, declan.doherty@intel.com, ciara.power@intel.com, xiaoyun.li@intel.com, nicolas.chautru@intel.com, maryam.tahhan@intel.com, reshma.pattan@intel.com, cristian.dumitrescu@intel.com Cc: dev@dpdk.org, nd@arm.com Date: Mon, 2 Aug 2021 05:18:46 -0500 Message-Id: <20210802101847.3462-12-joyce.kong@arm.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20210802101847.3462-1-joyce.kong@arm.com> References: <20210802101847.3462-1-joyce.kong@arm.com> Subject: [dpdk-dev] [PATCH v1 11/12] app/bbdev: use compiler atomics for thread sync X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Convert rte_atomic usages to compiler atomic built-ins for thread params sync in bbdev cases. Signed-off-by: Joyce Kong Reviewed-by: Ruifeng Wang --- app/test-bbdev/test_bbdev_perf.c | 135 ++++++++++++++----------------- 1 file changed, 59 insertions(+), 76 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 469597b8b3..dc62e16216 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -133,7 +133,7 @@ struct test_op_params { uint16_t num_to_process; uint16_t num_lcores; int vector_mask; - rte_atomic16_t sync; + uint16_t sync; struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; }; @@ -148,9 +148,9 @@ struct thread_params { uint8_t iter_count; double iter_average; double bler; - rte_atomic16_t nb_dequeued; - rte_atomic16_t processing_status; - rte_atomic16_t burst_sz; + uint16_t nb_dequeued; + int16_t processing_status; + uint16_t burst_sz; struct test_op_params *op_params; struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; @@ -2594,46 +2594,46 @@ dequeue_event_callback(uint16_t dev_id, } if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); printf( "Dequeue interrupt handler called for incorrect event!\n"); return; } - burst_sz = rte_atomic16_read(&tp->burst_sz); + burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); num_ops = tp->op_params->num_to_process; if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, &tp->dec_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, &tp->dec_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, &tp->enc_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); else /*RTE_BBDEV_OP_TURBO_ENC*/ deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, &tp->enc_ops[ - rte_atomic16_read(&tp->nb_dequeued)], + __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], burst_sz); if (deq < burst_sz) { printf( "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", burst_sz, deq); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); return; } - if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { - rte_atomic16_add(&tp->nb_dequeued, deq); + if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) { + __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); return; } @@ -2670,7 +2670,7 @@ dequeue_event_callback(uint16_t dev_id, if (ret) { printf("Buffers validation failed\n"); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); } switch (test_vector.op_type) { @@ -2691,7 +2691,7 @@ dequeue_event_callback(uint16_t dev_id, break; default: printf("Unknown op type: %d\n", test_vector.op_type); - rte_atomic16_set(&tp->processing_status, TEST_FAILED); + __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); return; } @@ -2700,7 +2700,7 @@ dequeue_event_callback(uint16_t dev_id, tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / ((double)total_time / (double)rte_get_tsc_hz()); - rte_atomic16_add(&tp->nb_dequeued, deq); + __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); } static int @@ -2738,11 +2738,10 @@ throughput_intr_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2790,17 +2789,15 @@ throughput_intr_lcore_ldpc_dec(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -2835,11 +2832,10 @@ throughput_intr_lcore_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2880,17 +2876,15 @@ throughput_intr_lcore_dec(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -2925,11 +2919,10 @@ throughput_intr_lcore_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -2969,17 +2962,15 @@ throughput_intr_lcore_enc(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -3015,11 +3006,10 @@ throughput_intr_lcore_ldpc_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - rte_atomic16_clear(&tp->processing_status); - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, num_to_process); @@ -3061,17 +3051,15 @@ throughput_intr_lcore_ldpc_enc(void *arg) * the number of operations is not a multiple of * burst size. */ - rte_atomic16_set(&tp->burst_sz, num_to_enq); + __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); /* Wait until processing of previous batch is * completed */ - while (rte_atomic16_read(&tp->nb_dequeued) != - (int16_t) enqueued) - rte_pause(); + rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); } if (j != TEST_REPETITIONS - 1) - rte_atomic16_clear(&tp->nb_dequeued); + __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); } return TEST_SUCCESS; @@ -3105,8 +3093,7 @@ throughput_pmd_lcore_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3209,8 +3196,7 @@ bler_pmd_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3339,8 +3325,7 @@ throughput_pmd_lcore_ldpc_dec(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); @@ -3456,8 +3441,7 @@ throughput_pmd_lcore_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); @@ -3547,8 +3531,7 @@ throughput_pmd_lcore_ldpc_enc(void *arg) bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; - while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) - rte_pause(); + rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); @@ -3731,7 +3714,7 @@ bler_test(struct active_device *ad, else return TEST_SKIPPED; - rte_atomic16_set(&op_params->sync, SYNC_WAIT); + __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); /* Main core is set at first entry */ t_params[0].dev_id = ad->dev_id; @@ -3754,7 +3737,7 @@ bler_test(struct active_device *ad, &t_params[used_cores++], lcore_id); } - rte_atomic16_set(&op_params->sync, SYNC_START); + __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = bler_function(&t_params[0]); /* Main core is always used */ @@ -3849,7 +3832,7 @@ throughput_test(struct active_device *ad, throughput_function = throughput_pmd_lcore_enc; } - rte_atomic16_set(&op_params->sync, SYNC_WAIT); + __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); /* Main core is set at first entry */ t_params[0].dev_id = ad->dev_id; @@ -3872,7 +3855,7 @@ throughput_test(struct active_device *ad, &t_params[used_cores++], lcore_id); } - rte_atomic16_set(&op_params->sync, SYNC_START); + __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); ret = throughput_function(&t_params[0]); /* Main core is always used */ @@ -3902,29 +3885,29 @@ throughput_test(struct active_device *ad, * Wait for main lcore operations. */ tp = &t_params[0]; - while ((rte_atomic16_read(&tp->nb_dequeued) < - op_params->num_to_process) && - (rte_atomic16_read(&tp->processing_status) != - TEST_FAILED)) + while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < + op_params->num_to_process) && + (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != + TEST_FAILED)) rte_pause(); tp->ops_per_sec /= TEST_REPETITIONS; tp->mbps /= TEST_REPETITIONS; - ret |= (int)rte_atomic16_read(&tp->processing_status); + ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); /* Wait for worker lcores operations */ for (used_cores = 1; used_cores < num_lcores; used_cores++) { tp = &t_params[used_cores]; - while ((rte_atomic16_read(&tp->nb_dequeued) < - op_params->num_to_process) && - (rte_atomic16_read(&tp->processing_status) != - TEST_FAILED)) + while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < + op_params->num_to_process) && + (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != + TEST_FAILED)) rte_pause(); tp->ops_per_sec /= TEST_REPETITIONS; tp->mbps /= TEST_REPETITIONS; - ret |= (int)rte_atomic16_read(&tp->processing_status); + ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); } /* Print throughput if test passed */ -- 2.17.1