[PATCH v2 3/4] eal: adapt rte pause APIs to use C11 atomics

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Tyler Retzlaff <roretzla@linux.microsoft.com>
To: dev@dpdk.org
Cc: Gaetan Rivet <grive@u256.net>,
	Bruce Richardson <bruce.richardson@intel.com>,
	Thomas Monjalon <thomas@monjalon.net>,
	Nicolas Chautru <nicolas.chautru@intel.com>,
	Yipeng Wang <yipeng1.wang@intel.com>,
	Sameh Gobriel <sameh.gobriel@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>,
	Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>,
	Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,
	Anatoly Burakov <anatoly.burakov@intel.com>,
	Olivier Matz <olivier.matz@6wind.com>,
	Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>,
	Joyce Kong <joyce.kong@arm.com>,
	Erik Gabriel Carrillo <erik.g.carrillo@intel.com>,
	Liang Ma <liangma@liangbit.com>,
	Peter Mccarthy <peter.mccarthy@intel.com>,
	Jerin Jacob <jerinj@marvell.com>,
	Maciej Czekaj <mczekaj@marvell.com>,
	David Hunt <david.hunt@intel.com>,
	Ruifeng Wang <ruifeng.wang@arm.com>,
	Min Zhou <zhoumin@loongson.cn>,
	David Christensen <drc@linux.vnet.ibm.com>,
	Stanislaw Kardach <kda@semihalf.com>,
	david.marchand@redhat.com, stephen@networkplumber.org,
	mb@smartsharesystems.com,
	Tyler Retzlaff <roretzla@linux.microsoft.com>
Subject: [PATCH v2 3/4] eal: adapt rte pause APIs to use C11 atomics
Date: Mon, 31 Jul 2023 22:03:53 -0700	[thread overview]
Message-ID: <1690866234-28365-4-git-send-email-roretzla@linux.microsoft.com> (raw)
In-Reply-To: <1690866234-28365-1-git-send-email-roretzla@linux.microsoft.com>

Adapt rte_pause.h APIs to use standard C11 atomics. Update consumers of
the pause APIs for the API break.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 app/test-bbdev/test_bbdev_perf.c         | 123 ++++++++++++++++++++-----------
 app/test/test_func_reentrancy.c          |   8 +-
 app/test/test_mcslock.c                  |  12 +--
 app/test/test_mempool_perf.c             |   8 +-
 app/test/test_pflock.c                   |  12 +--
 app/test/test_pmd_perf.c                 |  10 +--
 app/test/test_ring_perf.c                |   8 +-
 app/test/test_rwlock.c                   |   8 +-
 app/test/test_spinlock.c                 |   8 +-
 app/test/test_stack_perf.c               |  12 +--
 app/test/test_ticketlock.c               |   8 +-
 app/test/test_timer.c                    |  16 ++--
 drivers/event/opdl/opdl_ring.c           |  47 ++++++------
 drivers/net/thunderx/nicvf_rxtx.c        |   5 +-
 drivers/net/thunderx/nicvf_struct.h      |   2 +-
 lib/bpf/bpf_pkt.c                        |   4 +-
 lib/distributor/distributor_private.h    |   2 +-
 lib/distributor/rte_distributor_single.c |  44 +++++------
 lib/eal/arm/include/rte_pause_64.h       |  28 +++----
 lib/eal/common/eal_memcfg.h              |   2 +-
 lib/eal/include/generic/rte_pause.h      |  52 ++++++-------
 lib/eal/include/rte_mcslock.h            |  12 +--
 lib/eal/include/rte_pflock.h             |  22 +++---
 lib/eal/include/rte_ticketlock.h         |   8 +-
 lib/eal/loongarch/include/rte_pause.h    |   2 -
 lib/eal/ppc/include/rte_pause.h          |   2 -
 lib/eal/riscv/include/rte_pause.h        |   2 -
 lib/ring/rte_ring_c11_pvt.h              |  28 +++----
 lib/ring/rte_ring_core.h                 |   4 +-
 lib/ring/rte_ring_generic_pvt.h          |  19 +++--
 lib/ring/rte_ring_peek_elem_pvt.h        |   2 +-
 31 files changed, 280 insertions(+), 240 deletions(-)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 276bbf0..c3a3a28 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -143,7 +143,7 @@ struct test_op_params {
 	uint16_t num_to_process;
 	uint16_t num_lcores;
 	int vector_mask;
-	uint16_t sync;
+	uint16_t _Atomic sync;
 	struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES];
 };
 
@@ -158,7 +158,7 @@ struct thread_params {
 	uint8_t iter_count;
 	double iter_average;
 	double bler;
-	uint16_t nb_dequeued;
+	uint16_t _Atomic nb_dequeued;
 	int16_t processing_status;
 	uint16_t burst_sz;
 	struct test_op_params *op_params;
@@ -3021,27 +3021,32 @@ typedef int (test_case_function)(struct active_device *ad,
 	if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC)
 		deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id,
 				&tp->dec_ops[
-					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
+					atomic_load_explicit(&tp->nb_dequeued,
+						memory_order_relaxed)],
 				burst_sz);
 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC)
 		deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id,
 				&tp->dec_ops[
-					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
+					atomic_load_explicit(&tp->nb_dequeued,
+						memory_order_relaxed)],
 				burst_sz);
 	else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC)
 		deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id,
 				&tp->enc_ops[
-					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
+					atomic_load_explicit(&tp->nb_dequeued,
+						memory_order_relaxed)],
 				burst_sz);
 	else if (test_vector.op_type == RTE_BBDEV_OP_FFT)
 		deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id,
 				&tp->fft_ops[
-					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
+					atomic_load_explicit(&tp->nb_dequeued,
+						memory_order_relaxed)],
 				burst_sz);
 	else /*RTE_BBDEV_OP_TURBO_ENC*/
 		deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id,
 				&tp->enc_ops[
-					__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)],
+					atomic_load_explicit(&tp->nb_dequeued,
+						memory_order_relaxed)],
 				burst_sz);
 
 	if (deq < burst_sz) {
@@ -3052,8 +3057,9 @@ typedef int (test_case_function)(struct active_device *ad,
 		return;
 	}
 
-	if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) {
-		__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
+	if (atomic_load_explicit(&tp->nb_dequeued, memory_order_relaxed) + deq < num_ops) {
+		atomic_fetch_add_explicit(&tp->nb_dequeued, deq,
+			memory_order_relaxed);
 		return;
 	}
 
@@ -3126,7 +3132,8 @@ typedef int (test_case_function)(struct active_device *ad,
 	tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) /
 			((double)total_time / (double)rte_get_tsc_hz());
 
-	__atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED);
+	atomic_fetch_add_explicit(&tp->nb_dequeued, deq,
+		memory_order_relaxed);
 }
 
 static int
@@ -3165,9 +3172,10 @@ typedef int (test_case_function)(struct active_device *ad,
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
 				num_to_process);
@@ -3222,10 +3230,12 @@ typedef int (test_case_function)(struct active_device *ad,
 			/* Wait until processing of previous batch is
 			 * completed
 			 */
-			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
+			rte_wait_until_equal_16(&tp->nb_dequeued,
+				enqueued, memory_order_relaxed);
 		}
 		if (j != TEST_REPETITIONS - 1)
-			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+			atomic_store_explicit(&tp->nb_dequeued, 0,
+				memory_order_relaxed);
 	}
 
 	return TEST_SUCCESS;
@@ -3262,9 +3272,10 @@ typedef int (test_case_function)(struct active_device *ad,
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops,
 				num_to_process);
@@ -3313,10 +3324,12 @@ typedef int (test_case_function)(struct active_device *ad,
 			/* Wait until processing of previous batch is
 			 * completed
 			 */
-			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
+			rte_wait_until_equal_16(&tp->nb_dequeued,
+				enqueued, memory_order_relaxed);
 		}
 		if (j != TEST_REPETITIONS - 1)
-			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+			atomic_store_explicit(&tp->nb_dequeued, 0,
+				memory_order_relaxed);
 	}
 
 	return TEST_SUCCESS;
@@ -3352,9 +3365,10 @@ typedef int (test_case_function)(struct active_device *ad,
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
 			num_to_process);
@@ -3399,10 +3413,12 @@ typedef int (test_case_function)(struct active_device *ad,
 			/* Wait until processing of previous batch is
 			 * completed
 			 */
-			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
+			rte_wait_until_equal_16(&tp->nb_dequeued,
+				enqueued, memory_order_relaxed);
 		}
 		if (j != TEST_REPETITIONS - 1)
-			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+			atomic_store_explicit(&tp->nb_dequeued, 0,
+				memory_order_relaxed);
 	}
 
 	return TEST_SUCCESS;
@@ -3439,9 +3455,10 @@ typedef int (test_case_function)(struct active_device *ad,
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops,
 			num_to_process);
@@ -3488,10 +3505,12 @@ typedef int (test_case_function)(struct active_device *ad,
 			/* Wait until processing of previous batch is
 			 * completed
 			 */
-			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
+			rte_wait_until_equal_16(&tp->nb_dequeued,
+				enqueued, memory_order_relaxed);
 		}
 		if (j != TEST_REPETITIONS - 1)
-			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+			atomic_store_explicit(&tp->nb_dequeued, 0,
+				memory_order_relaxed);
 	}
 
 	return TEST_SUCCESS;
@@ -3528,9 +3547,10 @@ typedef int (test_case_function)(struct active_device *ad,
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
 	__atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&tp->nb_dequeued, 0, memory_order_relaxed);
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops,
 			num_to_process);
@@ -3575,10 +3595,12 @@ typedef int (test_case_function)(struct active_device *ad,
 			/* Wait until processing of previous batch is
 			 * completed
 			 */
-			rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED);
+			rte_wait_until_equal_16(&tp->nb_dequeued,
+				enqueued, memory_order_relaxed);
 		}
 		if (j != TEST_REPETITIONS - 1)
-			__atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED);
+			atomic_store_explicit(&tp->nb_dequeued, 0,
+				memory_order_relaxed);
 	}
 
 	return TEST_SUCCESS;
@@ -3613,7 +3635,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3732,7 +3755,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3867,7 +3891,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -3990,7 +4015,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -4121,7 +4147,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
 			num_ops);
@@ -4222,7 +4249,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq,
 			num_ops);
@@ -4323,7 +4351,8 @@ typedef int (test_case_function)(struct active_device *ad,
 
 	bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id];
 
-	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START,
+		memory_order_relaxed);
 
 	ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops);
 	TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops);
@@ -4519,7 +4548,8 @@ typedef int (test_case_function)(struct active_device *ad,
 	else
 		return TEST_SKIPPED;
 
-	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
+	atomic_store_explicit(&op_params->sync, SYNC_WAIT,
+		memory_order_relaxed);
 
 	/* Main core is set at first entry */
 	t_params[0].dev_id = ad->dev_id;
@@ -4542,7 +4572,8 @@ typedef int (test_case_function)(struct active_device *ad,
 				&t_params[used_cores++], lcore_id);
 	}
 
-	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	atomic_store_explicit(&op_params->sync, SYNC_START,
+		memory_order_relaxed);
 	ret = bler_function(&t_params[0]);
 
 	/* Main core is always used */
@@ -4641,7 +4672,8 @@ typedef int (test_case_function)(struct active_device *ad,
 			throughput_function = throughput_pmd_lcore_enc;
 	}
 
-	__atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED);
+	atomic_store_explicit(&op_params->sync, SYNC_WAIT,
+		memory_order_relaxed);
 
 	/* Main core is set at first entry */
 	t_params[0].dev_id = ad->dev_id;
@@ -4664,7 +4696,8 @@ typedef int (test_case_function)(struct active_device *ad,
 				&t_params[used_cores++], lcore_id);
 	}
 
-	__atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED);
+	atomic_store_explicit(&op_params->sync, SYNC_START,
+		memory_order_relaxed);
 	ret = throughput_function(&t_params[0]);
 
 	/* Main core is always used */
@@ -4694,8 +4727,8 @@ typedef int (test_case_function)(struct active_device *ad,
 	 * Wait for main lcore operations.
 	 */
 	tp = &t_params[0];
-	while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
-		op_params->num_to_process) &&
+	while ((atomic_load_explicit(&tp->nb_dequeued,
+		memory_order_relaxed) < op_params->num_to_process) &&
 		(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
 		TEST_FAILED))
 		rte_pause();
@@ -4708,8 +4741,8 @@ typedef int (test_case_function)(struct active_device *ad,
 	for (used_cores = 1; used_cores < num_lcores; used_cores++) {
 		tp = &t_params[used_cores];
 
-		while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) <
-			op_params->num_to_process) &&
+		while ((atomic_load_explicit(&tp->nb_dequeued,
+			memory_order_relaxed) < op_params->num_to_process) &&
 			(__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) !=
 			TEST_FAILED))
 			rte_pause();
diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index ae9de6f..833ceaf 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -54,11 +54,11 @@
 #define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 #define WAIT_SYNCHRO_FOR_WORKERS()   do { \
 	if (lcore_self != rte_get_main_lcore())                  \
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED); \
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed); \
 } while(0)
 
 /*
@@ -438,7 +438,7 @@ struct test_case test_cases[] = {
 		return -1;
 
 	__atomic_store_n(&obj_count, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 
 	cores = RTE_MIN(rte_lcore_count(), MAX_LCORES);
 	RTE_LCORE_FOREACH_WORKER(lcore_id) {
@@ -448,7 +448,7 @@ struct test_case test_cases[] = {
 		rte_eal_remote_launch(pt_case->func, pt_case->arg, lcore_id);
 	}
 
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 
 	if (pt_case->func(pt_case->arg) < 0)
 		ret = -1;
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index 52e45e7..3c26c69 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -42,7 +42,7 @@
 
 static unsigned int count;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_mcslock_per_core(__rte_unused void *arg)
@@ -75,7 +75,7 @@
 	rte_mcslock_t ml_perf_me;
 
 	/* wait synchro */
-	rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+	rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_get_timer_cycles();
 	while (lcount < MAX_LOOP) {
@@ -100,14 +100,14 @@
 	const unsigned int lcore = rte_lcore_id();
 
 	printf("\nTest with no lock on single core...\n");
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 	printf("Core [%u] Cost Time = %"PRIu64" us\n",
 			lcore, time_count[lcore]);
 	memset(time_count, 0, sizeof(time_count));
 
 	printf("\nTest with lock on single core...\n");
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	lock = 1;
 	load_loop_fn(&lock);
 	printf("Core [%u] Cost Time = %"PRIu64" us\n",
@@ -116,11 +116,11 @@
 
 	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
 
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 
 	rte_eal_mp_wait_lcore();
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index ce7c624..06f9fc1 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -88,7 +88,7 @@
 static int use_external_cache;
 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 /* number of objects in one bulk operation (get or put) */
 static unsigned n_get_bulk;
@@ -188,7 +188,7 @@ struct mempool_test_stats {
 
 	/* wait synchro for workers */
 	if (lcore_id != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	start_cycles = rte_get_timer_cycles();
 
@@ -233,7 +233,7 @@ struct mempool_test_stats {
 	int ret;
 	unsigned cores_save = cores;
 
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 
 	/* reset stats */
 	memset(stats, 0, sizeof(stats));
@@ -258,7 +258,7 @@ struct mempool_test_stats {
 	}
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 
 	ret = per_lcore_mempool_test(mp);
 
diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c
index 38da6bc..28addf8 100644
--- a/app/test/test_pflock.c
+++ b/app/test/test_pflock.c
@@ -31,7 +31,7 @@
 
 static rte_pflock_t sl;
 static rte_pflock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_pflock_per_core(__rte_unused void *arg)
@@ -69,7 +69,7 @@
 
 	/* wait synchro for workers */
 	if (lcore != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_rdtsc_precise();
 	while (lcount < MAX_LOOP) {
@@ -99,7 +99,7 @@
 	const unsigned int lcore = rte_lcore_id();
 
 	printf("\nTest with no lock on single core...\n");
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 	printf("Core [%u] Cost Time = %"PRIu64" us\n",
 			lcore, time_count[lcore]);
@@ -107,7 +107,7 @@
 
 	printf("\nTest with phase-fair lock on single core...\n");
 	lock = 1;
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 	printf("Core [%u] Cost Time = %"PRIu64" us\n",
 			lcore, time_count[lcore]);
@@ -116,12 +116,12 @@
 	printf("\nPhase-fair test on %u cores...\n", rte_lcore_count());
 
 	/* clear synchro and start workers */
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 	if (rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN) < 0)
 		return -1;
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 
 	rte_eal_mp_wait_lcore();
diff --git a/app/test/test_pmd_perf.c b/app/test/test_pmd_perf.c
index 3ef590c..1562bbb 100644
--- a/app/test/test_pmd_perf.c
+++ b/app/test/test_pmd_perf.c
@@ -537,7 +537,7 @@ enum {
 	return 0;
 }
 
-static uint64_t start;
+static uint64_t _Atomic start;
 
 static inline int
 poll_burst(void *args)
@@ -575,7 +575,7 @@ enum {
 		num[portid] = pkt_per_port;
 	}
 
-	rte_wait_until_equal_64(&start, 1, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_64(&start, 1, memory_order_acquire);
 
 	cur_tsc = rte_rdtsc();
 	while (total) {
@@ -629,9 +629,9 @@ enum {
 
 	/* only when polling first */
 	if (flags == SC_BURST_POLL_FIRST)
-		__atomic_store_n(&start, 1, __ATOMIC_RELAXED);
+		atomic_store_explicit(&start, 1, memory_order_relaxed);
 	else
-		__atomic_store_n(&start, 0, __ATOMIC_RELAXED);
+		atomic_store_explicit(&start, 0, memory_order_relaxed);
 
 	/* start polling thread
 	 * if in POLL_FIRST mode, poll once launched;
@@ -655,7 +655,7 @@ enum {
 
 	/* only when polling second  */
 	if (flags == SC_BURST_XMIT_FIRST)
-		__atomic_store_n(&start, 1, __ATOMIC_RELEASE);
+		atomic_store_explicit(&start, 1, memory_order_release);
 
 	/* wait for polling finished */
 	diff_tsc = rte_eal_wait_lcore(lcore);
diff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c
index 3972fd9..f0f5ef1 100644
--- a/app/test/test_ring_perf.c
+++ b/app/test/test_ring_perf.c
@@ -320,7 +320,7 @@ struct thread_params {
 	return 0;
 }
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 static uint64_t queue_count[RTE_MAX_LCORE];
 
 #define TIME_MS 100
@@ -342,7 +342,7 @@ struct thread_params {
 
 	/* wait synchro for workers */
 	if (lcore != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_get_timer_cycles();
 	while (time_diff < hz * TIME_MS / 1000) {
@@ -397,12 +397,12 @@ struct thread_params {
 		param.r = r;
 
 		/* clear synchro and start workers */
-		__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+		atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 		if (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)
 			return -1;
 
 		/* start synchro and launch test on main */
-		__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+		atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 		lcore_f(&param);
 
 		rte_eal_mp_wait_lcore();
diff --git a/app/test/test_rwlock.c b/app/test/test_rwlock.c
index 4ae0bf8..dfbd0d6 100644
--- a/app/test/test_rwlock.c
+++ b/app/test/test_rwlock.c
@@ -35,7 +35,7 @@
 
 static rte_rwlock_t sl;
 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 enum {
 	LC_TYPE_RDLOCK,
@@ -101,7 +101,7 @@ struct try_rwlock_lcore {
 
 	/* wait synchro for workers */
 	if (lcore != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_rdtsc_precise();
 	while (lcount < MAX_LOOP) {
@@ -134,12 +134,12 @@ struct try_rwlock_lcore {
 	printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
 
 	/* clear synchro and start workers */
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 	if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
 		return -1;
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(NULL);
 
 	rte_eal_mp_wait_lcore();
diff --git a/app/test/test_spinlock.c b/app/test/test_spinlock.c
index 3f59372..38724a1 100644
--- a/app/test/test_spinlock.c
+++ b/app/test/test_spinlock.c
@@ -48,7 +48,7 @@
 static rte_spinlock_recursive_t slr;
 static unsigned count = 0;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_spinlock_per_core(__rte_unused void *arg)
@@ -110,7 +110,7 @@
 
 	/* wait synchro for workers */
 	if (lcore != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_get_timer_cycles();
 	while (lcount < MAX_LOOP) {
@@ -149,11 +149,11 @@
 	printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
 	/* Clear synchro and start workers */
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 
 	rte_eal_mp_wait_lcore();
diff --git a/app/test/test_stack_perf.c b/app/test/test_stack_perf.c
index 1eae00a..67510d6 100644
--- a/app/test/test_stack_perf.c
+++ b/app/test/test_stack_perf.c
@@ -23,7 +23,7 @@
  */
 static volatile unsigned int bulk_sizes[] = {8, MAX_BURST};
 
-static uint32_t lcore_barrier;
+static uint32_t _Atomic lcore_barrier;
 
 struct lcore_pair {
 	unsigned int c1;
@@ -143,8 +143,8 @@ struct thread_args {
 	s = args->s;
 	size = args->sz;
 
-	__atomic_fetch_sub(&lcore_barrier, 1, __ATOMIC_RELAXED);
-	rte_wait_until_equal_32(&lcore_barrier, 0, __ATOMIC_RELAXED);
+	atomic_fetch_sub_explicit(&lcore_barrier, 1, memory_order_relaxed);
+	rte_wait_until_equal_32(&lcore_barrier, 0, memory_order_relaxed);
 
 	uint64_t start = rte_rdtsc();
 
@@ -173,7 +173,7 @@ struct thread_args {
 	unsigned int i;
 
 	for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
-		__atomic_store_n(&lcore_barrier, 2, __ATOMIC_RELAXED);
+		atomic_store_explicit(&lcore_barrier, 2, memory_order_relaxed);
 
 		args[0].sz = args[1].sz = bulk_sizes[i];
 		args[0].s = args[1].s = s;
@@ -206,7 +206,7 @@ struct thread_args {
 		int cnt = 0;
 		double avg;
 
-		__atomic_store_n(&lcore_barrier, n, __ATOMIC_RELAXED);
+		atomic_store_explicit(&lcore_barrier, n, memory_order_relaxed);
 
 		RTE_LCORE_FOREACH_WORKER(lcore_id) {
 			if (++cnt >= n)
@@ -300,7 +300,7 @@ struct thread_args {
 	struct lcore_pair cores;
 	struct rte_stack *s;
 
-	__atomic_store_n(&lcore_barrier, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&lcore_barrier, 0, memory_order_relaxed);
 
 	s = rte_stack_create(STACK_NAME, STACK_SIZE, rte_socket_id(), flags);
 	if (s == NULL) {
diff --git a/app/test/test_ticketlock.c b/app/test/test_ticketlock.c
index 242c136..f12d1e5 100644
--- a/app/test/test_ticketlock.c
+++ b/app/test/test_ticketlock.c
@@ -48,7 +48,7 @@
 static rte_ticketlock_recursive_t tlr;
 static unsigned int count;
 
-static uint32_t synchro;
+static uint32_t _Atomic synchro;
 
 static int
 test_ticketlock_per_core(__rte_unused void *arg)
@@ -111,7 +111,7 @@
 
 	/* wait synchro for workers */
 	if (lcore != rte_get_main_lcore())
-		rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+		rte_wait_until_equal_32(&synchro, 1, memory_order_relaxed);
 
 	begin = rte_rdtsc_precise();
 	while (lcore_count[lcore] < MAX_LOOP) {
@@ -153,11 +153,11 @@
 	printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
 	/* Clear synchro and start workers */
-	__atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 0, memory_order_relaxed);
 	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
 	/* start synchro and launch test on main */
-	__atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&synchro, 1, memory_order_relaxed);
 	load_loop_fn(&lock);
 
 	rte_eal_mp_wait_lcore();
diff --git a/app/test/test_timer.c b/app/test/test_timer.c
index 0c36dc9..cf89a19 100644
--- a/app/test/test_timer.c
+++ b/app/test/test_timer.c
@@ -202,7 +202,7 @@ struct mytimerinfo {
 
 /* Need to synchronize worker lcores through multiple steps. */
 enum { WORKER_WAITING = 1, WORKER_RUN_SIGNAL, WORKER_RUNNING, WORKER_FINISHED };
-static uint16_t lcore_state[RTE_MAX_LCORE];
+static uint16_t _Atomic lcore_state[RTE_MAX_LCORE];
 
 static void
 main_init_workers(void)
@@ -210,7 +210,7 @@ struct mytimerinfo {
 	unsigned i;
 
 	RTE_LCORE_FOREACH_WORKER(i) {
-		__atomic_store_n(&lcore_state[i], WORKER_WAITING, __ATOMIC_RELAXED);
+		atomic_store_explicit(&lcore_state[i], WORKER_WAITING, memory_order_relaxed);
 	}
 }
 
@@ -220,10 +220,10 @@ struct mytimerinfo {
 	unsigned i;
 
 	RTE_LCORE_FOREACH_WORKER(i) {
-		__atomic_store_n(&lcore_state[i], WORKER_RUN_SIGNAL, __ATOMIC_RELEASE);
+		atomic_store_explicit(&lcore_state[i], WORKER_RUN_SIGNAL, memory_order_release);
 	}
 	RTE_LCORE_FOREACH_WORKER(i) {
-		rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, __ATOMIC_ACQUIRE);
+		rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, memory_order_acquire);
 	}
 }
 
@@ -233,7 +233,7 @@ struct mytimerinfo {
 	unsigned i;
 
 	RTE_LCORE_FOREACH_WORKER(i) {
-		rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, __ATOMIC_ACQUIRE);
+		rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, memory_order_acquire);
 	}
 }
 
@@ -242,8 +242,8 @@ struct mytimerinfo {
 {
 	unsigned lcore_id = rte_lcore_id();
 
-	rte_wait_until_equal_16(&lcore_state[lcore_id], WORKER_RUN_SIGNAL, __ATOMIC_ACQUIRE);
-	__atomic_store_n(&lcore_state[lcore_id], WORKER_RUNNING, __ATOMIC_RELEASE);
+	rte_wait_until_equal_16(&lcore_state[lcore_id], WORKER_RUN_SIGNAL, memory_order_acquire);
+	atomic_store_explicit(&lcore_state[lcore_id], WORKER_RUNNING, memory_order_release);
 }
 
 static void
@@ -251,7 +251,7 @@ struct mytimerinfo {
 {
 	unsigned lcore_id = rte_lcore_id();
 
-	__atomic_store_n(&lcore_state[lcore_id], WORKER_FINISHED, __ATOMIC_RELEASE);
+	atomic_store_explicit(&lcore_state[lcore_id], WORKER_FINISHED, memory_order_release);
 }
 
 
diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c
index 69392b5..e1fa674 100644
--- a/drivers/event/opdl/opdl_ring.c
+++ b/drivers/event/opdl/opdl_ring.c
@@ -52,7 +52,7 @@ struct shared_state {
 	uint32_t head;  /* Head sequence number (for multi thread operation) */
 	char _pad2[RTE_CACHE_LINE_SIZE * 3];
 	struct opdl_stage *stage;  /* back pointer */
-	uint32_t tail;  /* Tail sequence number */
+	uint32_t _Atomic tail;  /* Tail sequence number */
 	char _pad3[RTE_CACHE_LINE_SIZE * 2];
 } __rte_cache_aligned;
 
@@ -169,7 +169,7 @@ struct opdl_ring {
 {
 	uint32_t i;
 	uint32_t this_tail = s->shared.tail;
-	uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE);
+	uint32_t min_seq = atomic_load_explicit(&s->deps[0]->tail, memory_order_acquire);
 	/* Input stage sequence numbers are greater than the sequence numbers of
 	 * its dependencies so an offset of t->num_slots is needed when
 	 * calculating available slots and also the condition which is used to
@@ -180,16 +180,16 @@ struct opdl_ring {
 	if (is_input_stage(s)) {
 		wrap = s->num_slots;
 		for (i = 1; i < s->num_deps; i++) {
-			uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-					__ATOMIC_ACQUIRE);
+			uint32_t seq = atomic_load_explicit(&s->deps[i]->tail,
+					memory_order_acquire);
 			if ((this_tail - seq) > (this_tail - min_seq))
 				min_seq = seq;
 		}
 	} else {
 		wrap = 0;
 		for (i = 1; i < s->num_deps; i++) {
-			uint32_t seq = __atomic_load_n(&s->deps[i]->tail,
-					__ATOMIC_ACQUIRE);
+			uint32_t seq = atomic_load_explicit(&s->deps[i]->tail,
+					memory_order_acquire);
 			if ((seq - this_tail) < (min_seq - this_tail))
 				min_seq = seq;
 		}
@@ -299,7 +299,8 @@ struct opdl_ring {
 	copy_entries_in(t, head, entries, num_entries);
 
 	s->head += num_entries;
-	__atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+	atomic_store_explicit(&s->shared.tail, s->head,
+		memory_order_release);
 
 	return num_entries;
 }
@@ -382,18 +383,18 @@ struct opdl_ring {
 		/* There should be no race condition here. If shared.tail
 		 * matches, no other core can update it until this one does.
 		 */
-		if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) ==
-				tail) {
+		if (atomic_load_explicit(&s->shared.tail,
+			memory_order_acquire) == tail) {
 			if (num_entries >= (head - tail)) {
 				claim_mgr_remove(disclaims);
-				__atomic_store_n(&s->shared.tail, head,
-						__ATOMIC_RELEASE);
+				atomic_store_explicit(&s->shared.tail, head,
+						memory_order_release);
 				num_entries -= (head - tail);
 			} else {
 				claim_mgr_move_tail(disclaims, num_entries);
-				__atomic_store_n(&s->shared.tail,
+				atomic_store_explicit(&s->shared.tail,
 						num_entries + tail,
-						__ATOMIC_RELEASE);
+						memory_order_release);
 				num_entries = 0;
 			}
 		} else if (block == false)
@@ -473,10 +474,11 @@ struct opdl_ring {
 	/* If another thread started inputting before this one, but hasn't
 	 * finished, we need to wait for it to complete to update the tail.
 	 */
-	rte_wait_until_equal_32(&s->shared.tail, old_head, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_32(&s->shared.tail, old_head,
+		memory_order_acquire);
 
-	__atomic_store_n(&s->shared.tail, old_head + num_entries,
-			__ATOMIC_RELEASE);
+	atomic_store_explicit(&s->shared.tail, old_head + num_entries,
+			memory_order_release);
 
 	return num_entries;
 }
@@ -628,8 +630,8 @@ struct opdl_ring {
 				num_entries, s->head - old_tail);
 		num_entries = s->head - old_tail;
 	}
-	__atomic_store_n(&s->shared.tail, num_entries + old_tail,
-			__ATOMIC_RELEASE);
+	atomic_store_explicit(&s->shared.tail, num_entries + old_tail,
+			memory_order_release);
 }
 
 uint32_t
@@ -658,7 +660,8 @@ struct opdl_ring {
 	copy_entries_in(t, head, entries, num_entries);
 
 	s->head += num_entries;
-	__atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+	atomic_store_explicit(&s->shared.tail, s->head,
+		memory_order_release);
 
 	return num_entries;
 
@@ -677,7 +680,8 @@ struct opdl_ring {
 	copy_entries_out(t, head, entries, num_entries);
 
 	s->head += num_entries;
-	__atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+	atomic_store_explicit(&s->shared.tail, s->head,
+		memory_order_release);
 
 	return num_entries;
 }
@@ -756,7 +760,8 @@ struct opdl_ring {
 		return 0;
 	}
 	if (s->threadsafe == false) {
-		__atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE);
+		atomic_store_explicit(&s->shared.tail, s->head,
+			memory_order_release);
 		s->seq += s->num_claimed;
 		s->shadow_head = s->head;
 		s->num_claimed = 0;
diff --git a/drivers/net/thunderx/nicvf_rxtx.c b/drivers/net/thunderx/nicvf_rxtx.c
index defa551..0db9505 100644
--- a/drivers/net/thunderx/nicvf_rxtx.c
+++ b/drivers/net/thunderx/nicvf_rxtx.c
@@ -385,9 +385,10 @@
 		ltail++;
 	}
 
-	rte_wait_until_equal_32(&rbdr->tail, next_tail, __ATOMIC_RELAXED);
+	rte_wait_until_equal_32(&rbdr->tail, next_tail,
+		memory_order_relaxed);
 
-	__atomic_store_n(&rbdr->tail, ltail, __ATOMIC_RELEASE);
+	atomic_store_explicit(&rbdr->tail, ltail, memory_order_release);
 	nicvf_addr_write(door, to_fill);
 	return to_fill;
 }
diff --git a/drivers/net/thunderx/nicvf_struct.h b/drivers/net/thunderx/nicvf_struct.h
index 13cf8fe..38c72b6 100644
--- a/drivers/net/thunderx/nicvf_struct.h
+++ b/drivers/net/thunderx/nicvf_struct.h
@@ -20,7 +20,7 @@ struct nicvf_rbdr {
 	struct rbdr_entry_t *desc;
 	nicvf_iova_addr_t phys;
 	uint32_t buffsz;
-	uint32_t tail;
+	uint32_t _Atomic tail;
 	uint32_t next_tail;
 	uint32_t head;
 	uint32_t qlen_mask;
diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c
index ffd2db7..f5765c2 100644
--- a/lib/bpf/bpf_pkt.c
+++ b/lib/bpf/bpf_pkt.c
@@ -25,7 +25,7 @@
 
 struct bpf_eth_cbi {
 	/* used by both data & control path */
-	uint32_t use;    /*usage counter */
+	uint32_t _Atomic use;    /*usage counter */
 	const struct rte_eth_rxtx_callback *cb;  /* callback handle */
 	struct rte_bpf *bpf;
 	struct rte_bpf_jit jit;
@@ -110,7 +110,7 @@ struct bpf_eth_cbh {
 
 	/* in use, busy wait till current RX/TX iteration is finished */
 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
-		RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use,
+		RTE_WAIT_UNTIL_MASKED(&cbi->use,
 			UINT32_MAX, !=, puse, __ATOMIC_RELAXED);
 	}
 }
diff --git a/lib/distributor/distributor_private.h b/lib/distributor/distributor_private.h
index 7101f63..3b43d3d 100644
--- a/lib/distributor/distributor_private.h
+++ b/lib/distributor/distributor_private.h
@@ -52,7 +52,7 @@
  * Only 64-bits of the memory is actually used though.
  */
 union rte_distributor_buffer_single {
-	volatile int64_t bufptr64;
+	int64_t _Atomic bufptr64;
 	char pad[RTE_CACHE_LINE_SIZE*3];
 } __rte_cache_aligned;
 
diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
index 2c77ac4..7a9a3d9 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -32,10 +32,10 @@
 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_GET_BUF;
 	RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
-		==, 0, __ATOMIC_RELAXED);
+		==, 0, memory_order_relaxed);
 
 	/* Sync with distributor on GET_BUF flag. */
-	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
+	atomic_store_explicit(&buf->bufptr64, req, memory_order_release);
 }
 
 struct rte_mbuf *
@@ -44,7 +44,7 @@ struct rte_mbuf *
 {
 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
 	/* Sync with distributor. Acquire bufptr64. */
-	if (__atomic_load_n(&buf->bufptr64, __ATOMIC_ACQUIRE)
+	if (atomic_load_explicit(&buf->bufptr64, memory_order_acquire)
 		& RTE_DISTRIB_GET_BUF)
 		return NULL;
 
@@ -72,10 +72,10 @@ struct rte_mbuf *
 	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
 			| RTE_DISTRIB_RETURN_BUF;
 	RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
-		==, 0, __ATOMIC_RELAXED);
+		==, 0, memory_order_relaxed);
 
 	/* Sync with distributor on RETURN_BUF flag. */
-	__atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
+	atomic_store_explicit(&buf->bufptr64, req, memory_order_release);
 	return 0;
 }
 
@@ -119,7 +119,7 @@ struct rte_mbuf *
 	d->in_flight_tags[wkr] = 0;
 	d->in_flight_bitmask &= ~(1UL << wkr);
 	/* Sync with worker. Release bufptr64. */
-	__atomic_store_n(&(d->bufs[wkr].bufptr64), 0, __ATOMIC_RELEASE);
+	atomic_store_explicit(&d->bufs[wkr].bufptr64, 0, memory_order_release);
 	if (unlikely(d->backlog[wkr].count != 0)) {
 		/* On return of a packet, we need to move the
 		 * queued packets for this core elsewhere.
@@ -165,21 +165,21 @@ struct rte_mbuf *
 	for (wkr = 0; wkr < d->num_workers; wkr++) {
 		uintptr_t oldbuf = 0;
 		/* Sync with worker. Acquire bufptr64. */
-		const int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
-							__ATOMIC_ACQUIRE);
+		const int64_t data = atomic_load_explicit(&d->bufs[wkr].bufptr64,
+							memory_order_acquire);
 
 		if (data & RTE_DISTRIB_GET_BUF) {
 			flushed++;
 			if (d->backlog[wkr].count)
 				/* Sync with worker. Release bufptr64. */
-				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+				atomic_store_explicit(&d->bufs[wkr].bufptr64,
 					backlog_pop(&d->backlog[wkr]),
-					__ATOMIC_RELEASE);
+					memory_order_release);
 			else {
 				/* Sync with worker on GET_BUF flag. */
-				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+				atomic_store_explicit(&d->bufs[wkr].bufptr64,
 					RTE_DISTRIB_GET_BUF,
-					__ATOMIC_RELEASE);
+					memory_order_release);
 				d->in_flight_tags[wkr] = 0;
 				d->in_flight_bitmask &= ~(1UL << wkr);
 			}
@@ -217,8 +217,8 @@ struct rte_mbuf *
 	while (next_idx < num_mbufs || next_mb != NULL) {
 		uintptr_t oldbuf = 0;
 		/* Sync with worker. Acquire bufptr64. */
-		int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
-						__ATOMIC_ACQUIRE);
+		int64_t data = atomic_load_explicit(&d->bufs[wkr].bufptr64,
+						memory_order_acquire);
 
 		if (!next_mb) {
 			next_mb = mbufs[next_idx++];
@@ -264,15 +264,15 @@ struct rte_mbuf *
 
 			if (d->backlog[wkr].count)
 				/* Sync with worker. Release bufptr64. */
-				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+				atomic_store_explicit(&d->bufs[wkr].bufptr64,
 						backlog_pop(&d->backlog[wkr]),
-						__ATOMIC_RELEASE);
+						memory_order_release);
 
 			else {
 				/* Sync with worker. Release bufptr64.  */
-				__atomic_store_n(&(d->bufs[wkr].bufptr64),
+				atomic_store_explicit(&d->bufs[wkr].bufptr64,
 						next_value,
-						__ATOMIC_RELEASE);
+						memory_order_release);
 				d->in_flight_tags[wkr] = new_tag;
 				d->in_flight_bitmask |= (1UL << wkr);
 				next_mb = NULL;
@@ -294,8 +294,8 @@ struct rte_mbuf *
 	for (wkr = 0; wkr < d->num_workers; wkr++)
 		if (d->backlog[wkr].count &&
 				/* Sync with worker. Acquire bufptr64. */
-				(__atomic_load_n(&(d->bufs[wkr].bufptr64),
-				__ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)) {
+				(atomic_load_explicit(&d->bufs[wkr].bufptr64,
+				memory_order_acquire) & RTE_DISTRIB_GET_BUF)) {
 
 			int64_t oldbuf = d->bufs[wkr].bufptr64 >>
 					RTE_DISTRIB_FLAG_BITS;
@@ -303,9 +303,9 @@ struct rte_mbuf *
 			store_return(oldbuf, d, &ret_start, &ret_count);
 
 			/* Sync with worker. Release bufptr64. */
-			__atomic_store_n(&(d->bufs[wkr].bufptr64),
+			atomic_store_explicit(&d->bufs[wkr].bufptr64,
 				backlog_pop(&d->backlog[wkr]),
-				__ATOMIC_RELEASE);
+				memory_order_release);
 		}
 
 	d->returns.start = ret_start;
diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h
index 5f70e97..96ad050 100644
--- a/lib/eal/arm/include/rte_pause_64.h
+++ b/lib/eal/arm/include/rte_pause_64.h
@@ -148,13 +148,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+		memory_order memorder)
 {
 	uint16_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+		memorder != memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_16(addr, value, memorder)
 	if (value != expected) {
@@ -167,13 +167,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-		int memorder)
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+		memory_order memorder)
 {
 	uint32_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+		memorder != memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_32(addr, value, memorder)
 	if (value != expected) {
@@ -186,13 +186,13 @@ static inline void rte_pause(void)
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-		int memorder)
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+		memory_order memorder)
 {
 	uint64_t value;
 
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&
-		memorder != __ATOMIC_RELAXED);
+	RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&
+		memorder != memory_order_relaxed);
 
 	__RTE_ARM_LOAD_EXC_64(addr, value, memorder)
 	if (value != expected) {
@@ -206,8 +206,8 @@ static inline void rte_pause(void)
 
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do {  \
 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));                \
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                  \
-		memorder != __ATOMIC_RELAXED);                            \
+	RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&              \
+		memorder != memory_order_relaxed);                        \
 	const uint32_t size = sizeof(*(addr)) << 3;                       \
 	typeof(*(addr)) expected_value = (expected);                      \
 	typeof(*(addr)) value;                                            \
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index 8889ba0..0decc29 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -18,7 +18,7 @@
  * Memory configuration shared across multiple processes.
  */
 struct rte_mem_config {
-	volatile uint32_t magic;   /**< Magic number - sanity check. */
+	uint32_t _Atomic magic;   /**< Magic number - sanity check. */
 	uint32_t version;
 	/**< Prevent secondary processes using different DPDK versions. */
 
diff --git a/lib/eal/include/generic/rte_pause.h b/lib/eal/include/generic/rte_pause.h
index ec1f418..19a000e 100644
--- a/lib/eal/include/generic/rte_pause.h
+++ b/lib/eal/include/generic/rte_pause.h
@@ -12,10 +12,10 @@
  * CPU pause operation.
  */
 
+#include <stdatomic.h>
 #include <stdint.h>
 #include <assert.h>
 #include <rte_common.h>
-#include <rte_atomic.h>
 
 /**
  * Pause CPU execution for a short while
@@ -35,13 +35,13 @@
  *  A 16-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder);
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+		memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 32-bit expected value, with a relaxed
@@ -53,13 +53,13 @@
  *  A 32-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-		int memorder);
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+		memory_order memorder);
 
 /**
  * Wait for *addr to be updated with a 64-bit expected value, with a relaxed
@@ -71,42 +71,42 @@
  *  A 64-bit expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-		int memorder);
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+		memory_order memorder);
 
 #ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
 static __rte_always_inline void
-rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
-		int memorder)
+rte_wait_until_equal_16(volatile uint16_t _Atomic *addr, uint16_t expected,
+		memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == memory_order_acquire || memorder == memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
-		int memorder)
+rte_wait_until_equal_32(volatile uint32_t _Atomic *addr, uint32_t expected,
+		memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == memory_order_acquire || memorder == memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
 static __rte_always_inline void
-rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
-		int memorder)
+rte_wait_until_equal_64(volatile uint64_t _Atomic *addr, uint64_t expected,
+		memory_order memorder)
 {
-	assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+	assert(memorder == memory_order_acquire || memorder == memory_order_relaxed);
 
-	while (__atomic_load_n(addr, memorder) != expected)
+	while (atomic_load_explicit(addr, memorder) != expected)
 		rte_pause();
 }
 
@@ -124,16 +124,16 @@
  *  An expected value to be in the memory location.
  * @param memorder
  *  Two different memory orders that can be specified:
- *  __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ *  memory_order_acquire and memory_order_relaxed. These map to
  *  C++11 memory orders with the same names, see the C++11 standard or
  *  the GCC wiki on atomic synchronization for detailed definition.
  */
 #define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do { \
 	RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder));               \
-	RTE_BUILD_BUG_ON(memorder != __ATOMIC_ACQUIRE &&                 \
-		memorder != __ATOMIC_RELAXED);                           \
+	RTE_BUILD_BUG_ON(memorder != memory_order_acquire &&             \
+		memorder != memory_order_relaxed);                       \
 	typeof(*(addr)) expected_value = (expected);                     \
-	while (!((__atomic_load_n((addr), (memorder)) & (mask)) cond     \
+	while (!((atomic_load_explicit((addr), (memorder)) & (mask)) cond \
 			expected_value))                                 \
 		rte_pause();                                             \
 } while (0)
diff --git a/lib/eal/include/rte_mcslock.h b/lib/eal/include/rte_mcslock.h
index a805cb2..7c227ec 100644
--- a/lib/eal/include/rte_mcslock.h
+++ b/lib/eal/include/rte_mcslock.h
@@ -33,7 +33,7 @@
  */
 typedef struct rte_mcslock {
 	struct rte_mcslock *next;
-	int locked; /* 1 if the queue locked, 0 otherwise */
+	uint32_t _Atomic locked; /* 1 if the queue locked, 0 otherwise */
 } rte_mcslock_t;
 
 /**
@@ -53,7 +53,7 @@
 	rte_mcslock_t *prev;
 
 	/* Init me node */
-	__atomic_store_n(&me->locked, 1, __ATOMIC_RELAXED);
+	atomic_store_explicit(&me->locked, 1, memory_order_relaxed);
 	__atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
 
 	/* If the queue is empty, the exchange operation is enough to acquire
@@ -88,7 +88,7 @@
 	 * to spin on me->locked until the previous lock holder resets
 	 * the me->locked using mcslock_unlock().
 	 */
-	rte_wait_until_equal_32((uint32_t *)&me->locked, 0, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_32(&me->locked, 0, memory_order_acquire);
 }
 
 /**
@@ -120,14 +120,14 @@
 		/* More nodes added to the queue by other CPUs.
 		 * Wait until the next pointer is set.
 		 */
-		uintptr_t *next;
-		next = (uintptr_t *)&me->next;
+		uintptr_t _Atomic *next;
+		next = (uintptr_t _Atomic *)&me->next;
 		RTE_WAIT_UNTIL_MASKED(next, UINTPTR_MAX, !=, 0,
 			__ATOMIC_RELAXED);
 	}
 
 	/* Pass lock to next waiter. */
-	__atomic_store_n(&me->next->locked, 0, __ATOMIC_RELEASE);
+	atomic_store_explicit(&me->next->locked, 0, memory_order_release);
 }
 
 /**
diff --git a/lib/eal/include/rte_pflock.h b/lib/eal/include/rte_pflock.h
index a3f7291..1c32512 100644
--- a/lib/eal/include/rte_pflock.h
+++ b/lib/eal/include/rte_pflock.h
@@ -40,8 +40,8 @@
  */
 struct rte_pflock {
 	struct {
-		uint16_t in;
-		uint16_t out;
+		uint16_t _Atomic in;
+		uint16_t _Atomic out;
 	} rd, wr;
 };
 typedef struct rte_pflock rte_pflock_t;
@@ -116,14 +116,14 @@ struct rte_pflock {
 	 * If no writer is present, then the operation has completed
 	 * successfully.
 	 */
-	w = __atomic_fetch_add(&pf->rd.in, RTE_PFLOCK_RINC, __ATOMIC_ACQUIRE)
+	w = atomic_fetch_add_explicit(&pf->rd.in, RTE_PFLOCK_RINC, memory_order_acquire)
 		& RTE_PFLOCK_WBITS;
 	if (w == 0)
 		return;
 
 	/* Wait for current write phase to complete. */
 	RTE_WAIT_UNTIL_MASKED(&pf->rd.in, RTE_PFLOCK_WBITS, !=, w,
-		__ATOMIC_ACQUIRE);
+		memory_order_acquire);
 }
 
 /**
@@ -139,7 +139,7 @@ struct rte_pflock {
 static inline void
 rte_pflock_read_unlock(rte_pflock_t *pf)
 {
-	__atomic_fetch_add(&pf->rd.out, RTE_PFLOCK_RINC, __ATOMIC_RELEASE);
+	atomic_fetch_add_explicit(&pf->rd.out, RTE_PFLOCK_RINC, memory_order_release);
 }
 
 /**
@@ -160,8 +160,8 @@ struct rte_pflock {
 	/* Acquire ownership of write-phase.
 	 * This is same as rte_ticketlock_lock().
 	 */
-	ticket = __atomic_fetch_add(&pf->wr.in, 1, __ATOMIC_RELAXED);
-	rte_wait_until_equal_16(&pf->wr.out, ticket, __ATOMIC_ACQUIRE);
+	ticket = atomic_fetch_add_explicit(&pf->wr.in, 1, memory_order_relaxed);
+	rte_wait_until_equal_16(&pf->wr.out, ticket, memory_order_acquire);
 
 	/*
 	 * Acquire ticket on read-side in order to allow them
@@ -172,10 +172,10 @@ struct rte_pflock {
 	 * speculatively.
 	 */
 	w = RTE_PFLOCK_PRES | (ticket & RTE_PFLOCK_PHID);
-	ticket = __atomic_fetch_add(&pf->rd.in, w, __ATOMIC_RELAXED);
+	ticket = atomic_fetch_add_explicit(&pf->rd.in, w, memory_order_relaxed);
 
 	/* Wait for any pending readers to flush. */
-	rte_wait_until_equal_16(&pf->rd.out, ticket, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_16(&pf->rd.out, ticket, memory_order_acquire);
 }
 
 /**
@@ -192,10 +192,10 @@ struct rte_pflock {
 rte_pflock_write_unlock(rte_pflock_t *pf)
 {
 	/* Migrate from write phase to read phase. */
-	__atomic_fetch_and(&pf->rd.in, RTE_PFLOCK_LSB, __ATOMIC_RELEASE);
+	atomic_fetch_and_explicit(&pf->rd.in, RTE_PFLOCK_LSB, memory_order_release);
 
 	/* Allow other writers to continue. */
-	__atomic_fetch_add(&pf->wr.out, 1, __ATOMIC_RELEASE);
+	atomic_fetch_add_explicit(&pf->wr.out, 1, memory_order_release);
 }
 
 #ifdef __cplusplus
diff --git a/lib/eal/include/rte_ticketlock.h b/lib/eal/include/rte_ticketlock.h
index 5db0d8a..5206b62 100644
--- a/lib/eal/include/rte_ticketlock.h
+++ b/lib/eal/include/rte_ticketlock.h
@@ -31,7 +31,7 @@
 typedef union {
 	uint32_t tickets;
 	struct {
-		uint16_t current;
+		uint16_t _Atomic current;
 		uint16_t next;
 	} s;
 } rte_ticketlock_t;
@@ -63,7 +63,7 @@
 rte_ticketlock_lock(rte_ticketlock_t *tl)
 {
 	uint16_t me = __atomic_fetch_add(&tl->s.next, 1, __ATOMIC_RELAXED);
-	rte_wait_until_equal_16(&tl->s.current, me, __ATOMIC_ACQUIRE);
+	rte_wait_until_equal_16(&tl->s.current, me, memory_order_acquire);
 }
 
 /**
@@ -75,8 +75,8 @@
 static inline void
 rte_ticketlock_unlock(rte_ticketlock_t *tl)
 {
-	uint16_t i = __atomic_load_n(&tl->s.current, __ATOMIC_RELAXED);
-	__atomic_store_n(&tl->s.current, i + 1, __ATOMIC_RELEASE);
+	uint16_t i = atomic_load_explicit(&tl->s.current, memory_order_relaxed);
+	atomic_store_explicit(&tl->s.current, i + 1, memory_order_release);
 }
 
 /**
diff --git a/lib/eal/loongarch/include/rte_pause.h b/lib/eal/loongarch/include/rte_pause.h
index 4302e1b..2987a1b 100644
--- a/lib/eal/loongarch/include/rte_pause.h
+++ b/lib/eal/loongarch/include/rte_pause.h
@@ -9,8 +9,6 @@
 extern "C" {
 #endif
 
-#include "rte_atomic.h"
-
 #include "generic/rte_pause.h"
 
 static inline void rte_pause(void)
diff --git a/lib/eal/ppc/include/rte_pause.h b/lib/eal/ppc/include/rte_pause.h
index 16e47ce..54bbbc7 100644
--- a/lib/eal/ppc/include/rte_pause.h
+++ b/lib/eal/ppc/include/rte_pause.h
@@ -9,8 +9,6 @@
 extern "C" {
 #endif
 
-#include "rte_atomic.h"
-
 #include "generic/rte_pause.h"
 
 static inline void rte_pause(void)
diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h
index cb8e9ca..ffa7158 100644
--- a/lib/eal/riscv/include/rte_pause.h
+++ b/lib/eal/riscv/include/rte_pause.h
@@ -11,8 +11,6 @@
 extern "C" {
 #endif
 
-#include "rte_atomic.h"
-
 #include "generic/rte_pause.h"
 
 static inline void rte_pause(void)
diff --git a/lib/ring/rte_ring_c11_pvt.h b/lib/ring/rte_ring_c11_pvt.h
index f895950..402f819 100644
--- a/lib/ring/rte_ring_c11_pvt.h
+++ b/lib/ring/rte_ring_c11_pvt.h
@@ -91,14 +91,15 @@
 			return 0;
 
 		*new_head = *old_head + n;
-		if (is_sp)
-			r->prod.head = *new_head, success = 1;
-		else
+		if (is_sp) {
+			r->prod.head = *new_head;
+			success = 1;
+		} else
 			/* on failure, *old_head is updated */
-			success = __atomic_compare_exchange_n(&r->prod.head,
+			success = atomic_compare_exchange_strong_explicit(&r->prod.head,
 					old_head, *new_head,
-					0, __ATOMIC_RELAXED,
-					__ATOMIC_RELAXED);
+					memory_order_relaxed,
+					memory_order_relaxed);
 	} while (unlikely(success == 0));
 	return n;
 }
@@ -137,7 +138,7 @@
 	int success;
 
 	/* move cons.head atomically */
-	*old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);
+	*old_head = atomic_load_explicit(&r->cons.head, memory_order_relaxed);
 	do {
 		/* Restore n as it may change every loop */
 		n = max;
@@ -166,14 +167,15 @@
 			return 0;
 
 		*new_head = *old_head + n;
-		if (is_sc)
-			r->cons.head = *new_head, success = 1;
-		else
+		if (is_sc) {
+			r->cons.head = *new_head;
+			success = 1;
+		} else
 			/* on failure, *old_head will be updated */
-			success = __atomic_compare_exchange_n(&r->cons.head,
+			success = atomic_compare_exchange_strong_explicit(&r->cons.head,
 							old_head, *new_head,
-							0, __ATOMIC_RELAXED,
-							__ATOMIC_RELAXED);
+							memory_order_relaxed,
+							memory_order_relaxed);
 	} while (unlikely(success == 0));
 	return n;
 }
diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h
index 82b2370..cf3cb84 100644
--- a/lib/ring/rte_ring_core.h
+++ b/lib/ring/rte_ring_core.h
@@ -66,8 +66,8 @@ enum rte_ring_sync_type {
  * but offset for *sync_type* and *tail* values should remain the same.
  */
 struct rte_ring_headtail {
-	volatile uint32_t head;      /**< prod/consumer head. */
-	volatile uint32_t tail;      /**< prod/consumer tail. */
+	uint32_t _Atomic head;      /**< prod/consumer head. */
+	uint32_t _Atomic tail;      /**< prod/consumer tail. */
 	RTE_STD_C11
 	union {
 		/** sync type of prod/cons */
diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index 5acb6e5..12a3ca8 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -89,11 +89,14 @@
 			return 0;
 
 		*new_head = *old_head + n;
-		if (is_sp)
-			r->prod.head = *new_head, success = 1;
-		else
-			success = rte_atomic32_cmpset(&r->prod.head,
-					*old_head, *new_head);
+		if (is_sp) {
+			r->prod.head = *new_head;
+			success = 1;
+		} else
+			/* NOTE: review for potential ordering optimization */
+			success = atomic_compare_exchange_strong_explicit(&r->prod.head,
+					old_head, *new_head,
+					memory_order_seq_cst, memory_order_seq_cst);
 	} while (unlikely(success == 0));
 	return n;
 }
@@ -162,8 +165,10 @@
 			rte_smp_rmb();
 			success = 1;
 		} else {
-			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
-					*new_head);
+			/* NOTE: review for potential ordering optimization */
+			success = atomic_compare_exchange_strong_explicit(&r->cons.head,
+					old_head, *new_head,
+					memory_order_seq_cst, memory_order_seq_cst);
 		}
 	} while (unlikely(success == 0));
 	return n;
diff --git a/lib/ring/rte_ring_peek_elem_pvt.h b/lib/ring/rte_ring_peek_elem_pvt.h
index bb0a7d5..6707e38 100644
--- a/lib/ring/rte_ring_peek_elem_pvt.h
+++ b/lib/ring/rte_ring_peek_elem_pvt.h
@@ -59,7 +59,7 @@
 
 	pos = tail + num;
 	ht->head = pos;
-	__atomic_store_n(&ht->tail, pos, __ATOMIC_RELEASE);
+	atomic_store_explicit(&ht->tail, pos, memory_order_release);
 }
 
 /**
-- 
1.8.3.1

next prev parent reply	other threads:[~2023-08-01  5:04 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-31 21:07 [PATCH 0/4] eal: update public API to use stdatomic atomics Tyler Retzlaff
2023-07-31 21:07 ` [PATCH 1/4] build: require minimum c11 compiler Tyler Retzlaff
2023-07-31 21:07 ` [PATCH 2/4] devtools: forbid use of GCC atomic builtins Tyler Retzlaff
2023-07-31 21:07 ` [PATCH 3/4] eal: adapt rte pause APIs to use C11 atomics Tyler Retzlaff
2023-07-31 21:07 ` [PATCH 4/4] eal: adapt rte spinlock and rwlock " Tyler Retzlaff
2023-08-01  5:03 ` [PATCH v2 0/4] eal: update public API to use stdatomic atomics Tyler Retzlaff
2023-08-01  5:03   ` [PATCH v2 1/4] build: require minimum c11 compiler Tyler Retzlaff
2023-08-01  5:03   ` [PATCH v2 2/4] devtools: forbid use of GCC atomic builtins Tyler Retzlaff
2023-08-01  5:03   ` Tyler Retzlaff [this message]
2023-08-01  5:03   ` [PATCH v2 4/4] eal: adapt rte spinlock and rwlock APIs to use C11 atomics Tyler Retzlaff
2023-08-01  7:33   ` [PATCH v2 0/4] eal: update public API to use stdatomic atomics Morten Brørup
2023-08-01 17:07   ` Tyler Retzlaff
2023-08-02  5:13 ` [PATCH v3 " Tyler Retzlaff
2023-08-02  5:13   ` [PATCH v3 1/4] build: require minimum c11 compiler Tyler Retzlaff
2023-08-02  5:13   ` [PATCH v3 2/4] devtools: forbid use of GCC atomic builtins Tyler Retzlaff
2023-08-02  5:13   ` [PATCH v3 3/4] eal: adapt rte pause APIs to use C11 atomics Tyler Retzlaff
2023-08-02  5:13   ` [PATCH v3 4/4] eal: adapt rte spinlock and rwlock " Tyler Retzlaff
2023-08-02  5:31 ` [PATCH v4 0/4] eal: update public API to use stdatomic atomics Tyler Retzlaff
2023-08-02  5:31   ` [PATCH v4 1/4] build: require minimum c11 compiler Tyler Retzlaff
2023-08-02  5:31   ` [PATCH v4 2/4] devtools: forbid use of GCC atomic builtins Tyler Retzlaff
2023-08-02  5:31   ` [PATCH v4 3/4] eal: adapt rte pause APIs to use C11 atomics Tyler Retzlaff
2023-08-02  5:31   ` [PATCH v4 4/4] eal: adapt rte spinlock and rwlock " Tyler Retzlaff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1690866234-28365-4-git-send-email-roretzla@linux.microsoft.com \
    --to=roretzla@linux.microsoft.com \
    --cc=anatoly.burakov@intel.com \
    --cc=andrew.rybchenko@oktetlabs.ru \
    --cc=bruce.richardson@intel.com \
    --cc=david.hunt@intel.com \
    --cc=david.marchand@redhat.com \
    --cc=dev@dpdk.org \
    --cc=drc@linux.vnet.ibm.com \
    --cc=erik.g.carrillo@intel.com \
    --cc=grive@u256.net \
    --cc=honnappa.nagarahalli@arm.com \
    --cc=jerinj@marvell.com \
    --cc=joyce.kong@arm.com \
    --cc=kda@semihalf.com \
    --cc=konstantin.v.ananyev@yandex.ru \
    --cc=liangma@liangbit.com \
    --cc=mb@smartsharesystems.com \
    --cc=mczekaj@marvell.com \
    --cc=nicolas.chautru@intel.com \
    --cc=olivier.matz@6wind.com \
    --cc=peter.mccarthy@intel.com \
    --cc=ruifeng.wang@arm.com \
    --cc=sameh.gobriel@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=thomas@monjalon.net \
    --cc=vladimir.medvedkin@intel.com \
    --cc=yipeng1.wang@intel.com \
    --cc=zhoumin@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).